예제 #1
0
파일: dynety.py 프로젝트: dpressel/baseline
 def _attend(self, query, mask=None):
     query = unsqueeze(query, 0) # ((1, H), B)
     # ((1, H), B) * ((H, T), B) -> ((1, T), B) -> ((T, 1), B)
     attn_scores = dy.transpose(query * self.context)
     if mask is not None:
         attn_scores = dy.cmult(attn_scores, mask[0]) + (mask[1] * dy.scalarInput(-1e9))
     return dy.softmax(attn_scores)
예제 #2
0
파일: attention.py 프로젝트: danielhers/cnn
def generate(in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    embedded = embed_sentence(in_seq)
    encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(encoded)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))

    out = ''
    count_EOS = 0
    for i in range(len(in_seq)*2):
        if count_EOS == 2: break
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector).vec_value()
        next_char = probs.index(max(probs))
        last_output_embeddings = output_lookup[next_char]
        if int2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += int2char[next_char]
    return out
예제 #3
0
파일: attention.py 프로젝트: danielhers/cnn
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(vectors)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings]))
    loss = []

    for char in output:
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
def attend(blstm_outputs, h_t, W_c, v_a, W__a, U__a):
    # iterate through input states to compute alphas
    # print 'computing scores...'
    # scores = [W_a * pc.concatenate([h_t, h_input]) for h_input in blstm_outputs]
    scores = [v_a * pc.tanh(W__a * h_t + U__a * h_input) for h_input in blstm_outputs]
    # print 'computed scores'
    # normalize to alphas using softmax
    # print 'computing alphas...'
    alphas = pc.softmax(pc.concatenate(scores))
    # print 'computed alphas...'
    # compute c using alphas
    # print 'computing c...'

    # import time
    # s = time.time()
    # dim = len(blstm_outputs[0].vec_value())
    # stacked_alphas = pc.concatenate_cols([alphas for j in xrange(dim)])
    # stacked_vecs = pc.concatenate_cols([h_input for h_input in blstm_outputs])
    # c = pc.esum(pc.cwise_multiply(stacked_vecs, stacked_alphas))
    # print "stack time:", time.time() - s

    # s = time.time()
    c = pc.esum([h_input * pc.pick(alphas, j) for j, h_input in enumerate(blstm_outputs)])
    # print "pick time:", time.time() - s
    # print 'computed c'
    # print 'c len is {}'.format(len(c.vec_value()))
    # compute output state h~ using c and the decoder's h (global attention variation from Loung and Manning 2015)
    # print 'computing h~...'
    h_output = pc.tanh(W_c * pc.concatenate([h_t, c]))
    # print 'len of h_output is {}'.format(len(h_output.vec_value()))
    # print 'computed h~'

    return h_output, alphas, W__a.value()
예제 #5
0
파일: attention.py 프로젝트: kamigaito/cnn
def generate(input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    def sample(probs):
        rnd = random.random()
        for i, p in enumerate(probs):
            rnd -= p
            if rnd <= 0: break
        return i

    embedded = embed_sentence(input)
    encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))
    out = ''
    count_EOS = 0
    for i in range(len(input)*2):
        if count_EOS == 2: break
        vector = dy.concatenate([attend(encoded, s), last_output_embeddings])

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        probs = probs.vec_value()
        next_char = sample(probs)
        last_output_embeddings = output_lookup[next_char]
        if int2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += int2char[next_char]
    return out
예제 #6
0
파일: dy_model.py 프로젝트: jcyk/CWS
    def word_repr(self, char_seq):
        # obtain the word representation when given its character sequence
        wlen = len(char_seq)
        if 'rgW%d'%wlen not in self.param_exprs:
            self.param_exprs['rgW%d'%wlen] = dy.parameter(self.params['reset_gate_W'][wlen-1])
            self.param_exprs['rgb%d'%wlen] = dy.parameter(self.params['reset_gate_b'][wlen-1])
            self.param_exprs['cW%d'%wlen] = dy.parameter(self.params['com_W'][wlen-1])
            self.param_exprs['cb%d'%wlen] = dy.parameter(self.params['com_b'][wlen-1])
            self.param_exprs['ugW%d'%wlen] = dy.parameter(self.params['update_gate_W'][wlen-1])
            self.param_exprs['ugb%d'%wlen] = dy.parameter(self.params['update_gate_b'][wlen-1])
          
        chars = dy.concatenate(char_seq)
        reset_gate = dy.logistic(self.param_exprs['rgW%d'%wlen] * chars + self.param_exprs['rgb%d'%wlen])
        comb = dy.concatenate([dy.tanh(self.param_exprs['cW%d'%wlen] * dy.cmult(reset_gate,chars) + self.param_exprs['cb%d'%wlen]),chars])
        update_logits = self.param_exprs['ugW%d'%wlen] * comb + self.param_exprs['ugb%d'%wlen]
        
        update_gate = dy.transpose(dy.concatenate_cols([dy.softmax(dy.pickrange(update_logits,i*(wlen+1),(i+1)*(wlen+1))) for i in xrange(self.options['ndims'])]))
        
        # The following implementation of Softmax fucntion is not safe, but faster...
        #exp_update_logits = dy.exp(dy.reshape(update_logits,(self.options['ndims'],wlen+1)))
        #update_gate = dy.cdiv(exp_update_logits, dy.concatenate_cols([dy.sum_cols(exp_update_logits)] *(wlen+1)))
        #assert (not np.isnan(update_gate.npvalue()).any())

        word = dy.sum_cols(dy.cmult(update_gate,dy.reshape(comb,(self.options['ndims'],wlen+1))))
        return word
예제 #7
0
def calc_attention(src_output_matrix, tgt_output_embedding, fixed_attentional_component):
    w1_att_src = dy.parameter(w1_att_src_p)
    w1_att_tgt = dy.parameter(w1_att_tgt_p)
    w2_att = dy.parameter(w2_att_p)
    a_t = dy.transpose(dy.tanh(dy.colwise_add(fixed_attentional_component, w1_att_tgt * tgt_output_embedding))) * w2_att
    alignment = dy.softmax(a_t)
    att_output = src_output_matrix * alignment
    return att_output, alignment
예제 #8
0
def tag_sent(words):
    vecs = build_tagging_graph(words)
    vecs = [dy.softmax(v) for v in vecs]
    probs = [v.npvalue() for v in vecs]
    tags = []
    for prb in probs:
        tag = np.argmax(prb)
        tags.append(vt.i2w[tag])
    return zip(words, tags)
예제 #9
0
파일: dynety.py 프로젝트: dpressel/baseline
 def _attend(self, query, mask=None):
     # query ((H), B)
     # mask  ((T, 1), B)
     projected_state = self.decoder * query  # ((H,), B)
     non_lin = dy.tanh(dy.colwise_add(self.context_proj, projected_state))  # ((H, T), B)
     attn_scores = dy.transpose(self.v * non_lin)  # ((1, H), B) * ((H, T), B) -> ((1, T), B) -> ((T, 1), B)
     if mask is not None:
         attn_scores = dy.cmult(attn_scores, mask[0]) + (mask[1] * dy.scalarInput(-1e9))
     return dy.softmax(attn_scores)  # ((T, 1), B)
예제 #10
0
 def __call__(self, x):
     W = dy.parameter(self.mw)
     b = dy.parameter(self.mb)
     W2 = dy.parameter(self.mw2)
     b2 = dy.parameter(self.mb2)
     mlp_output = W2 * (dy.tanh(W * x + b)) + b2
     if fDo_3_Layers:
         W3 = dy.parameter(self.mw3)
         b3 = dy.parameter(self.mb3)
         mlp_output = W3 * (dy.tanh(dy.mlpoutput)) + b3
     return dy.softmax(mlp_output)
예제 #11
0
파일: rnnlm.py 프로젝트: danielhers/cnn
 def predict_next_word(self, sentence):
     dy.renew_cg()
     init_state = self.builder.initial_state()
     state = init_state
     for cw in sentence:
         # assume word is already a word-id
         x_t = self.lookup[int(cw)]
         state = state.add_input(x_t)
     y_t = state.output()
     r_t = self.bias + (self.R * y_t)
     prob = dy.softmax(r_t)
     return prob
예제 #12
0
def generate_sent():
  dy.renew_cg()
  hist = [S] * N
  sent = []
  while True:
    p = dy.softmax(calc_score_of_history(hist)).npvalue()
    next_word = np.random.choice(nwords, p=p/p.sum())
    if next_word == S or len(sent) == MAX_LEN:
      break
    sent.append(next_word)
    hist = hist[1:] + [next_word]
  return sent
def predict_output_sequence(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, W_c, W__a, U__a, v__a, lemma, feats, alphabet_index,
                            inverse_alphabet_index, feat_index, feature_types):
    pc.renew_cg()

    R = pc.parameter(R)
    bias = pc.parameter(bias)
    W_c = pc.parameter(W_c)
    W__a = pc.parameter(W__a)
    U__a = pc.parameter(U__a)
    v__a = pc.parameter(v__a)

    blstm_outputs = encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index,
                                           feat_lookup, feats, feature_types, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    i = 0
    predicted_sequence = []

    # run the decoder through the sequence and predict characters
    while i < MAX_PREDICTION_LEN:

        # get current h of the decoder
        s = s.add_input(prev_output_vec)
        decoder_rnn_output = s.output()

        # perform attention step
        attention_output_vector, alphas, W = attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        # find best candidate output
        probs = pc.softmax(readout)
        next_char_index = common.argmax(probs.vec_value())
        predicted_sequence.append(inverse_alphabet_index[next_char_index])

        # check if reached end of word
        if predicted_sequence[-1] == END_WORD:
            break

        # prepare for the next iteration - "feedback"
        prev_output_vec = char_lookup[next_char_index]
        i += 1

    # remove the end word symbol
    return predicted_sequence[0:-1]
예제 #14
0
파일: attention.py 프로젝트: danielhers/cnn
def attend(input_mat, state, w1dt):
    global attention_w2
    global attention_v
    w2 = dy.parameter(attention_w2)
    v = dy.parameter(attention_v)

    # input_mat: (encoder_state x seqlen) => input vecs concatenated as cols
    # w1dt: (attdim x seqlen)
    # w2dt: (attdim x attdim)
    w2dt = w2*dy.concatenate(list(state.s()))
    # att_weights: (seqlen,) row vector
    unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt)))
    att_weights = dy.softmax(unnormalized)
    # context: (encoder_state)
    context = input_mat * att_weights
    return context
예제 #15
0
파일: attention.py 프로젝트: kamigaito/cnn
def attend(input_vectors, state):
    global attention_w1
    global attention_w2
    global attention_v
    w1 = dy.parameter(attention_w1)
    w2 = dy.parameter(attention_w2)
    v = dy.parameter(attention_v)
    attention_weights = []

    w2dt = w2*dy.concatenate(list(state.s()))
    for input_vector in input_vectors:
        attention_weight = v*dy.tanh(w1*input_vector + w2dt)
        attention_weights.append(attention_weight)
    attention_weights = dy.softmax(dy.concatenate(attention_weights))
    output_vectors = dy.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
    return output_vectors
예제 #16
0
    def translate_sentence(self, sent):
        dy.renew_cg()

        W_y = dy.parameter(self.W_y)
        b_y = dy.parameter(self.b_y)

        sent_rev = list(reversed(sent))
        # Bidirectional representations
        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []
        for (cw_l2r, cw_r2l) in zip(sent, sent_rev):
            l2r_state = l2r_state.add_input(
                dy.lookup(self.src_lookup, self.src_token_to_id[cw_l2r]))
            r2l_state = r2l_state.add_input(
                dy.lookup(self.src_lookup, self.src_token_to_id[cw_r2l]))
            l2r_contexts.append(l2r_state.output())
            r2l_contexts.append(r2l_state.output())
        r2l_contexts.reverse()

        h_fs = []
        for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts):
            h_fs.append(dy.concatenate([l2r_i, r2l_i]))
        h_fs_matrix = dy.concatenate_cols(h_fs)

        # Decoder
        trans_sentence = ['<S>']
        cw = trans_sentence[-1]
        c_t = dy.vecInput(self.hidden_size * 2)
        start = dy.concatenate(
            [dy.lookup(self.tgt_lookup, self.tgt_token_to_id['<S>']), c_t])
        dec_state = self.dec_builder.initial_state().add_input(start)
        while len(trans_sentence) < self.max_len:
            h_e = dec_state.output()
            c_t = self.__attention_mlp(h_fs_matrix, h_e)
            embed_t = dy.lookup(self.tgt_lookup, self.tgt_token_to_id[cw])
            x_t = dy.concatenate([embed_t, c_t])
            dec_state = dec_state.add_input(x_t)
            y_star = b_y + W_y * dec_state.output()
            p = dy.softmax(y_star)
            cw = self.tgt_id_to_token[np.argmax(p.npvalue())]
            if cw == '</S>':
                break
            trans_sentence.append(cw)
        return ' '.join(trans_sentence[1:])
예제 #17
0
    def decoding(self, src_encodings):
        src_len = len(src_encodings)

        # NOTE: should transpose before calling `mst` method!
        s_arc, s_label = self.cal_scores(src_encodings)
        s_arc_values = dy.softmax(s_arc).npvalue().transpose()  # src_len, src_len
        s_label_values = np.asarray([x.npvalue() for x in s_label]).transpose((2, 1, 0))  # src_len, src_len, n_labels

        # weights = np.zeros((src_len + 1, src_len + 1))
        # weights[0, 1:(src_len + 1)] = np.inf
        # weights[1:(src_len + 1), 0] = np.inf
        # weights[1:(src_len + 1), 1:(src_len + 1)] = s_arc_values[batch]
        weights = s_arc_values
        pred_heads = mst(weights)
        pred_labels = [np.argmax(labels[head]) for head, labels in zip(pred_heads, s_label_values)]

        return pred_heads, pred_labels
 def decode(self, features):
     last_output_embeddings = self.pattern_embeddings[0]
     s = self.decoder_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(self.hidden_dim), last_output_embeddings]))
     out = []
     for i in range(self.max_rule_length):
         h_t = s.output()
         context = self.attend(features, h_t)
         out_vector = self.pt * dy.concatenate([context, h_t]) + self.pt_bias
         probs = dy.softmax(out_vector).vec_value()
         last_output = probs.index(max(probs))
         last_output_embeddings = self.pattern_embeddings[last_output]
         s = s.add_input(dy.concatenate([context, last_output_embeddings]))
         if last_output != 0:
             out.append(last_output)
         else:
             return out
     return out
def attend2(blstm_outputs, s_prev, y_feedback, v_a, W_a, U_a, U_o, V_o, C_o):

    # attention mechanism - Bahdanau style
    # iterate through input states to compute alphas
    # print 'computing scores...'

    # W_a: hidden x hidden, U_a: hidden x 2 hidden, v_a: hidden, each score: scalar
    scores = [v_a * pc.tanh(W_a * s_prev + U_a * h_j) for h_j in blstm_outputs]
    alphas = pc.softmax(pc.concatenate(scores))

    # c_i: 2 hidden
    c_i = pc.esum([h_input * pc.pick(alphas, j) for j, h_input in enumerate(blstm_outputs)])

    # U_o = 2l x hidden, V_o = 2l x input, C_o = 2l x 2 hidden
    attention_output_vector = U_o * s_prev + V_o * y_feedback + C_o * c_i

    return attention_output_vector, alphas
예제 #20
0
    def decode(self, pre_encoded, pos_encoded, refex, entity):
        refex = list(refex)
        refex = [self.token2int[c] for c in refex]

        h_pre = dy.concatenate_cols(pre_encoded)
        w1dt_pre = None

        h_pos = dy.concatenate_cols(pos_encoded)
        w1dt_pos = None

        last_output_embeddings = self.lookup[self.token2int[self.EOS]]
        entity_embedding = self.lookup[self.token2int[entity]]

        s = self.dec_lstm.initial_state().add_input(
            dy.concatenate([
                dy.vecInput(self.config.state_dim * 4), last_output_embeddings,
                entity_embedding
            ]))
        loss = []

        for word in refex:
            # w1dt can be computed and cached once for the entire decoding phase
            w1dt_pre = w1dt_pre or self.attention_w1_pre * h_pre
            w1dt_pos = w1dt_pos or self.attention_w1_pos * h_pos

            attention_pre, _ = self.attend(h_pre, s, w1dt_pre,
                                           self.attention_w2_pre,
                                           self.attention_v_pre)
            attention_pos, _ = self.attend(h_pos, s, w1dt_pos,
                                           self.attention_w2_pos,
                                           self.attention_v_pos)

            vector = dy.concatenate([
                attention_pre, attention_pos, last_output_embeddings,
                entity_embedding
            ])
            s = s.add_input(vector)
            out_vector = self.decoder_w * s.output() + self.decoder_b
            probs = dy.softmax(out_vector)
            prob = dy.pick(probs, word)
            last_output_embeddings = self.lookup[word]
            loss.append(-dy.log(prob))

        loss = dy.esum(loss)
        return loss
예제 #21
0
    def calc_loss(self, translator, src, trg):
        batch_size = trg.batch_size()
        uniques = [set() for _ in range(batch_size)]
        deltas = []
        probs = []
        sign = -1 if self.inv_eval else 1
        search_outputs = translator.generate_search_output(
            src, self.search_strategy)
        for search_output in search_outputs:
            logprob = search_output.logsoftmaxes
            sample = search_output.word_ids
            attentions = search_output.attentions

            logprob = dy.esum(logprob) * self.alpha
            # Calculate the evaluation score
            eval_score = np.zeros(batch_size, dtype=float)
            mask = np.zeros(batch_size, dtype=float)
            for j in range(batch_size):
                ref_j = self.remove_eos(trg[j].words)
                hyp_j = self.remove_eos(sample[j].tolist())
                if self.unique_sample:
                    hash_val = hash(tuple(hyp_j))
                    if len(hyp_j) == 0 or hash_val in uniques[j]:
                        mask[j] = -1e20  # represents negative infinity
                        continue
                    else:
                        uniques[j].add(hash_val)
                    # Calc evaluation score
                eval_score[j] = self.evaluation_metric.evaluate_one_sent(
                    ref_j, hyp_j) * sign
            # Appending the delta and logprob of this sample
            prob = logprob + dy.inputTensor(mask, batched=True)
            deltas.append(dy.inputTensor(eval_score, batched=True))
            probs.append(prob)
        sample_prob = dy.softmax(dy.concatenate(probs))
        deltas = dy.concatenate(deltas)
        risk = dy.sum_elems(dy.cmult(sample_prob, deltas))

        ### Debug
        #print(sample_prob.npvalue().transpose()[0])
        #print(deltas.npvalue().transpose()[0])
        #print("----------------------")
        ### End debug

        return FactoredLossExpr({"risk": risk})
예제 #22
0
    def train(self, trainning_set):
        loss_chunk = 0
        loss_all = 0
        total_chunk = 0
        total_all = 0
        losses = []
        for datapoint in trainning_set:

            premise = datapoint["premise"]
            hypothesis = datapoint["hypothesis"]
            gold_label = datapoint["gold_label"]

            ep = self.encode_sentence(premise)
            eh = self.encode_sentence(hypothesis)

            Ps = []
            for i in range(self.projection_size):
                Ps.append(self.Phis[i].expr() * ep)
            P = dy.transpose(dy.concatenate_cols(Ps))
            s = P * eh
            y = dy.softmax(self.W.expr() * s + self.b.expr())

            losses.append(-dy.log(dy.pick(y, gold_label)))

            # process losses in chunks
            if len(losses) > 50:
                loss = dy.esum(losses)
                l = loss.scalar_value()
                loss.backward()
                self.trainer.update()
                dy.renew_cg()
                losses = []
                loss_chunk += l
                loss_all += l
                total_chunk += 1
                total_all += 1

        # consider any remaining losses
        if len(losses) > 0:
            loss = dy.esum(losses)
            loss.scalar_value()
            loss.backward()
            self.trainer.update()
            dy.renew_cg()
        print(f'loss: {loss_all/total_all:.4f}')
예제 #23
0
    def word_repr(self, char_seq):
        # obtain the word representation when given its character sequence
        wlen = len(char_seq)
        if 'rgW%d' % wlen not in self.param_exprs:
            self.param_exprs['rgW%d' % wlen] = dy.parameter(
                self.params['reset_gate_W'][wlen - 1])
            self.param_exprs['rgb%d' % wlen] = dy.parameter(
                self.params['reset_gate_b'][wlen - 1])
            self.param_exprs['cW%d' % wlen] = dy.parameter(
                self.params['com_W'][wlen - 1])
            self.param_exprs['cb%d' % wlen] = dy.parameter(
                self.params['com_b'][wlen - 1])
            self.param_exprs['ugW%d' % wlen] = dy.parameter(
                self.params['update_gate_W'][wlen - 1])
            self.param_exprs['ugb%d' % wlen] = dy.parameter(
                self.params['update_gate_b'][wlen - 1])

        chars = dy.concatenate(char_seq)
        reset_gate = dy.logistic(self.param_exprs['rgW%d' % wlen] * chars +
                                 self.param_exprs['rgb%d' % wlen])
        comb = dy.concatenate([
            dy.tanh(self.param_exprs['cW%d' % wlen] *
                    dy.cmult(reset_gate, chars) +
                    self.param_exprs['cb%d' % wlen]), chars
        ])
        update_logits = self.param_exprs[
            'ugW%d' % wlen] * comb + self.param_exprs['ugb%d' % wlen]

        update_gate = dy.transpose(
            dy.concatenate_cols([
                dy.softmax(
                    dy.pickrange(update_logits, i * (wlen + 1),
                                 (i + 1) * (wlen + 1)))
                for i in xrange(self.options['ndims'])
            ]))

        # The following implementation of Softmax fucntion is not safe, but faster...
        #exp_update_logits = dy.exp(dy.reshape(update_logits,(self.options['ndims'],wlen+1)))
        #update_gate = dy.cdiv(exp_update_logits, dy.concatenate_cols([dy.sum_cols(exp_update_logits)] *(wlen+1)))
        #assert (not np.isnan(update_gate.npvalue()).any())

        word = dy.sum_cols(
            dy.cmult(update_gate,
                     dy.reshape(comb, (self.options['ndims'], wlen + 1))))
        return word
예제 #24
0
    def __call__(self, dec_hidden_state, xh_vecs):
        # enc_vecs: one vector for each input token
        # dec_hiden_vecs: represents current decoder hidden state, one vector for each layer
        if not xh_vecs:
            # TODO: fix this hack, possibly by padding xh_vecs with bos and eos
            return dy.vecInput(self.z_dim)
        s = dy.concatenate(list(dec_hidden_state))
        W = dy.parameter(self.p_W)
        b = dy.parameter(self.p_b)
        UV = dy.parameter(self.p_UV)
        v = dy.parameter(self.p_v)

        vT = dy.transpose(v)

        Ws = W * s
        attn_weights = [vT * dy.tanh(Ws + UV * xh + b) for xh in xh_vecs]
        attn_dist = dy.softmax(dy.concatenate(attn_weights))
        return dy.concatenate_cols(xh_vecs) * attn_dist, attn_dist
예제 #25
0
    def compute_loss(self, in_sentence, out_sentence):
        from numpy import argmax
        dn.renew_cg()
        lookup, R, C, bias, encoder, decoder = self.get_params()
        in_s, out_s = self.wrap_sentence(in_sentence), self.wrap_sentence(
            out_sentence)

        loss = []
        enc_s, _ = input_all(encoder.initial_state(),
                             [lookup[c] for c in in_s])
        s = decoder.initial_state().add_input(enc_s.output())
        for char, next_char in zip(out_s, out_s[1:]):
            s = s.add_input(lookup[char])
            probs = dn.softmax(R * s.output() + bias)
            loss.append(-dn.log(dn.pick(probs, next_char)))
            # loss.append( dn.pickneglogsoftmax(probs,next_char) )
        loss = dn.esum(loss)
        return loss
    def _attend(self, input_vectors, state_fw, state_bw):
        w1 = self.att_w1.expr()
        w2 = self.att_w2.expr()
        v = self.att_v.expr()
        attention_weights = []

        w2dt = w2 * dy.concatenate([state_fw.h()[-1], state_bw.h()[-1]])

        for input_vector in input_vectors:
            attention_weight = v * dy.tanh(w1 * input_vector + w2dt)
            attention_weights.append(attention_weight)

        attention_weights = dy.softmax(dy.concatenate(attention_weights))

        output_vectors = \
            dy.esum([vector * attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])

        return output_vectors
예제 #27
0
 def tag_sent(self, words, trans=True):
     self.eval = True
     if trans:
         if self.en_trans:
             self.en_trans.transliterate('\n'.join(set(words)))
             self.etrans = self.en_trans.trans_dict
         if self.hi_trans:
             self.hi_trans.transliterate('\n'.join(set(words)))
             self.htrans = self.hi_trans.trans_dict
     dy.renew_cg()
     vecs = self.build_tagging_graph(words)
     vecs = [dy.softmax(v) for v in vecs]
     probs = [v.npvalue() for v in vecs]
     tags = []
     for prb in probs:
         tag = np.argmax(prb)
         tags.append(self.meta.i2t[tag])
     return zip(words, tags)
예제 #28
0
 def generate_top_n(logProb, state, words, wordID, n):
     if words[-1] == endSymbol:
         yield logProb, words
     h_e = state.output()
     c_t, unkIndex = self.__attention_mlp(h_fs_matrix, h_e)
     embed_t = dy.lookup(self.tgt_lookup, wordID)
     x_t = dy.concatenate([embed_t, c_t])
     next_state = state.add_input(x_t)
     y_star = np.reshape(
         dy.softmax(W_y * next_state.output() + b_y).npvalue(), -1)
     for nextWordID in np.argpartition(-y_star, n)[n]:
         currentWord = self.tgt_id_to_token[nextWordID]
         if currentWord == unkSymbol:
             currentWord = self.src_id_to_token[unkIndex]
         currentLogProb = logProb + np.log(y_star[nextWordID])
         newWords = words + [currentWord]
         yield currentLogProb, generate_top_n(currentProb, newWords,
                                              nextWordID, n), newWords
예제 #29
0
def decode(embedded, wf):
    wf = list(wf) + [EOS]
    wf = [char2int[c] for c in wf]
    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    last_output_embeddings = lookup[char2int[EOS]]
    s = generator.initial_state().add_input(dy.concatenate([embedded,
                                                            last_output_embeddings]))
    loss = []
    for char in wf:
        # w1dt can be computed and cached once for the entire decoding phase
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
        s = s.add_input(dy.concatenate([embedded,last_output_embeddings]))
    loss = dy.esum(loss)
    return loss
    def loss(self, observation, instance):
        #trans = instance.transformation
        #if trans not in self.known_transformations:
        #k    newtrans = list(self.param_dict.keys())[0][0] ### SUPER ARBITRARY
        #k    tqdm.write("WARNING: unknown transformtion picked for instance {}; using transformation {}".format(trans, newtrans))
        #k    trans = newtrans

        trans = 'lul'
        b = dy.parameter(self.param_dict[(trans, 'b')])
        W = dy.parameter(self.param_dict[(trans, 'W')])

        features, label = observation

        prediction = dy.softmax(dy.affine_transform([b, W, dy.inputVector(features)]))

        loss = -dy.log(dy.pick(prediction, label))

        return prediction, loss
예제 #31
0
    def run(self, question, image):
        image_conv = self.convnet(image)

        embeddings = self.embed_question(question, image_conv)
        h0 = dy.concatenate([self.lookup[self.word2id[self.EOS]], image_conv])

        init_state = self.enc_fwd_lstm.initial_state().add_input(h0)
        fwd_vectors = self.run_lstm(init_state, embeddings)

        embeddings_rev = list(reversed(embeddings))
        init_state = self.enc_bwd_lstm.initial_state().add_input(h0)
        bwd_vectors = self.run_lstm(init_state, embeddings_rev)
        bwd_vectors = list(reversed(bwd_vectors))

        vector = dy.average(
            [dy.concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)])

        return dy.softmax(self.W * vector + self.b)
예제 #32
0
def do_one_sequence(rnn, params, sequence):
    # setup the sequence
    dy.renew_cg()
    s0 = rnn.initial_state()

    R = params["R"]
    bias = params["bias"]
    lookup = params["lookup"]
    input_sequence = [input_token2int[t] for (t, _) in sequence]
    output_sequence = [output_token2int[t] for (_, t) in sequence]
    s = s0
    loss = []
    for input_token, output_token in zip(input_sequence, output_sequence):
        s = s.add_input(lookup[input_token])
        probs = dy.softmax(R * s.output() + bias)
        loss.append(-dy.log(dy.pick(probs, output_token)))
    loss = dy.esum(loss)
    return loss
예제 #33
0
    def predict_next(self):
        (R, bias, W_c, W__a, U__a, v__a) = self.cg_params

        # soft attention vector
        att_scores = [
            v__a * dy.tanh(W__a * self.output_state + U__a * h_input)
            for h_input in self.biencoder
        ]
        alphas = dy.softmax(dy.concatenate(att_scores))
        c = dy.esum([
            h_input * dy.pick(alphas, j)
            for j, h_input in enumerate(self.biencoder)
        ])

        # softmax over vocabulary
        h_output = dy.tanh(W_c * dy.concatenate([self.output_state, c]))
        self.logprobs = (dy.log_softmax(R * h_output + bias)).npvalue()
        return self.logprobs
예제 #34
0
def predict(lstm, params, line, y):
    dy.renew_cg()
    s0 = lstm.initial_state()
    R = params["R"]
    bias = params["bias"]
    lookup = params["lookup"]
    sentence = ["<EOS>"] + list(line) + ["<EOS>"]
    sentence = [char2int[c] for c in sentence]
    s = s0

    for char in sentence:
        s = s.add_input(lookup[char])

    lstm_out = (R * s.output()) + bias
    yhat = dy.softmax(mlp(lstm_out, params))
    loss = -(dy.log(dy.pick(yhat, y)))

    return loss, yhat
예제 #35
0
    def test_sentence(self, words, word_idxs):
        dy.renew_cg()
        forward_init, backward_init = [
            b.initial_state() for b in self.builders
        ]
        embed_words = words.tensor
        # entities = words.ents
        forward = forward_init.transduce(embed_words)
        backward = backward_init.transduce(reversed(embed_words))

        predictions = []
        for f, b in zip(forward, backward):
            r_t = self(dy.concatenate([f, b]))
            temp_val = dy.softmax(r_t).value()
            # chosen = np.argmax(temp_val)
            predictions.append(temp_val)

        return predictions
예제 #36
0
    def _step(self, prev_samples, encoder_output, decoder_state, prev_att,
              prev_att_expr, runtime, compute_attention):
        if prev_att is None:
            prev_att = dy.inputVector([0] * len(encoder_output))
        else:
            prev_att = dy.inputVector(
                prev_att
            )  #this truncates backpropagation - don't know if it is ok to do that

        #input from receptive network
        while len(prev_samples) < self.config.receptive_input:
            prev_samples = [0] + prev_samples
        input_vect2 = dy.inputVector(
            prev_samples[-self.config.receptive_input:])
        input_vect3 = dy.inputVector(
            prev_samples[-self.config.sample_trail_size:])
        for w, b in zip(self.receptive_w, self.receptive_b):
            input_vect2 = dy.rectify(w.expr() * input_vect2 + b.expr())
            if not runtime:
                input_vect2 = dy.dropout(input_vect2,
                                         self.config.receptive_dropout)
        #input from encoder
        if compute_attention or prev_att_expr is None:
            att_vect = dy.inputVector(
                prev_samples[-self.config.receptive_input:])
            for w, b in zip(self.attention_w, self.attention_b):
                att_vect = dy.rectify(w.expr() * att_vect + b.expr())
        else:
            att_vect = None

        input_vect1, prev_att = self._attend(encoder_output, decoder_state,
                                             prev_att, prev_att_expr, att_vect,
                                             compute_attention)

        decoder_state = decoder_state.add_input(
            dy.concatenate([input_vect1, input_vect2]))
        presoftmax = dy.concatenate(
            [decoder_state.output(), input_vect2, input_vect3])
        for w, b in zip(self.presoftmax_w, self.presoftmax_b):
            presoftmax = dy.rectify(w.expr() * presoftmax + b.expr())

        softmax = dy.softmax(self.softmax_w.expr() * presoftmax +
                             self.softmax_b.expr())
        return softmax, decoder_state, prev_att.value(), prev_att
예제 #37
0
    def generate(self, sentence):
        #embedded = embed_sentence(in_seq)
        encoded = self.encode_sentence(sentence)

        w = dy.parameter(self.decoder_w)
        b = dy.parameter(self.decoder_b)
        w1 = dy.parameter(self.attention_w1)
        dw = dy.parameter(self.duration_weight)
        db = dy.parameter(self.duration_bias)
        #duration = dw * state.output() + db
        input_mat = dy.concatenate_cols(encoded)
        w1dt = None

        last_output_embeddings = self.output_lookup[2]
        s = self.dec_lstm.initial_state().add_input(
            dy.concatenate(
                [dy.vecInput(self.state_size * 2), last_output_embeddings]))

        out = ''
        res = []
        dur_g = []
        count_EOS = 0
        for i in range(len(sentence)):
            if count_EOS == 2: break
            # w1dt can be computed and cached once for the entire decoding phase
            w1dt = w1dt or w1 * input_mat
            vector = dy.concatenate(
                [self.attend(input_mat, s, w1dt), last_output_embeddings])
            s = s.add_input(vector)
            #k = s
            #dloss = self.test_duration(k, i, b)
            out_vector = w * s.output() + b
            dur_pred = dw * s.output() + db
            probs = dy.softmax(out_vector).vec_value()
            next_word = probs.index(max(probs))
            last_output_embeddings = self.output_lookup[next_word]
            if next_word == 2:
                count_EOS += 1
                continue
            res.append(next_word)
            dur_g.append(dy.rectify(dur_pred))

            #out += int2char[next_word]
        return res, dur_g
예제 #38
0
    def generate(self, pre_context, pos_context, entity):
        embedded = self.embed_sentence(pre_context)
        pre_encoded = self.encode_sentence(self.encpre_fwd_lstm,
                                           self.encpre_bwd_lstm, embedded)

        embedded = self.embed_sentence(pos_context)
        pos_encoded = self.encode_sentence(self.encpos_fwd_lstm,
                                           self.encpos_bwd_lstm, embedded)

        w = dy.parameter(self.decoder_w)
        b = dy.parameter(self.decoder_b)

        last_output_embeddings = self.output_lookup[self.output2int[self.EOS]]
        try:
            entity_embedding = self.input_lookup[self.input2int[entity]]
        except:
            entity_embedding = self.input_lookup[self.input2int[self.EOS]]
        s = self.dec_lstm.initial_state().add_input(
            dy.concatenate([
                pre_encoded, pos_encoded, last_output_embeddings,
                entity_embedding
            ]))

        out = []
        count_EOS = 0
        for i in range(self.config['GENERATION']):
            if count_EOS == 2: break

            vector = dy.concatenate([
                pre_encoded, pos_encoded, last_output_embeddings,
                entity_embedding
            ])
            s = s.add_input(vector)
            out_vector = w * s.output() + b
            probs = dy.softmax(out_vector).vec_value()
            next_word = probs.index(max(probs))
            last_output_embeddings = self.output_lookup[next_word]
            if self.int2output[next_word] == self.EOS:
                count_EOS += 1
                continue

            out.append(self.int2output[next_word])

        return out
예제 #39
0
def attend(input_vectors, state):
    global attention_w1
    global attention_w2
    global attention_v
    w1 = pc.parameter(attention_w1)
    w2 = pc.parameter(attention_w2)
    v = pc.parameter(attention_v)
    attention_weights = []
    w2dt = w2 * pc.concatenate(list(state.s()))

    for input_vector in input_vectors:
        attention_weight = v * pc.tanh(w1 * input_vector + w2dt)
        attention_weights.append(attention_weight)
    attention_weights = pc.softmax(pc.concatenate(attention_weights))
    output_vectors = pc.esum([
        vector * attention_weight
        for vector, attention_weight in zip(input_vectors, attention_weights)
    ])
    return output_vectors
예제 #40
0
def attend2(blstm_outputs, s_prev, y_feedback, v_a, W_a, U_a, U_o, V_o, C_o):

    # attention mechanism - Bahdanau style
    # iterate through input states to compute alphas
    # print 'computing scores...'

    # W_a: hidden x hidden, U_a: hidden x 2 hidden, v_a: hidden, each score: scalar
    scores = [v_a * pc.tanh(W_a * s_prev + U_a * h_j) for h_j in blstm_outputs]
    alphas = pc.softmax(pc.concatenate(scores))

    # c_i: 2 hidden
    c_i = pc.esum([
        h_input * pc.pick(alphas, j) for j, h_input in enumerate(blstm_outputs)
    ])

    # U_o = 2l x hidden, V_o = 2l x input, C_o = 2l x 2 hidden
    attention_output_vector = U_o * s_prev + V_o * y_feedback + C_o * c_i

    return attention_output_vector, alphas
예제 #41
0
  def sample(self, x: dy.Expression, n: numbers.Integral, temperature: numbers.Real=1.0):
    assert temperature != 0.0
    scores_expr = self.calc_log_probs(x)
    if temperature != 1.0:
      scores_expr *= 1.0 / temperature
      scores = dy.softmax(scores_expr).npvalue()
    else:
      scores = dy.exp(scores_expr).npvalue()

    # Numpy is very picky. If the sum is off even by 1e-8 it complains.
    scores /= sum(scores)

    a = range(scores.shape[0])
    samples = np.random.choice(a, (n,), replace=True, p=scores)

    r = []
    for word in samples:
      r.append((word, dy.pick(scores_expr, word)))
    return r
예제 #42
0
    def attend(self, input_vectors, state, batch_size):

        w1 = dynet.parameter(self.attention_w1)
        w2 = dynet.parameter(self.attention_w2)
        v = dynet.parameter(self.attention_v)

        src_len = len(input_vectors)

        # enc_size, sent_len, batch_size
        src_enc_all = dynet.concatenate_cols(input_vectors)

        att_hidden = dynet.tanh(dynet.colwise_add(w1 * src_enc_all, w2 * state))
        att_weights = dynet.reshape(v * att_hidden, (src_len, ), batch_size)
        # sent_len, batch_size
        att_weights = dynet.softmax(att_weights)

        output_vectors = src_enc_all * att_weights

        return output_vectors, att_weights
예제 #43
0
    def attention(self, src_encodings, h_t, batch_size):
        W1_att_f = dy.parameter(self.W1_att_f)
        W1_att_e = dy.parameter(self.W1_att_e)
        W2_att = dy.parameter(self.W2_att)

        src_len = len(src_encodings)

        # enc_size, sent_len, batch_size
        src_enc_all = dy.concatenate_cols(src_encodings)

        att_hidden = dy.tanh(
            dy.colwise_add(W1_att_f * src_enc_all, W1_att_e * h_t))
        att_weights = dy.reshape(W2_att * att_hidden, (src_len, ), batch_size)
        # sent_len, batch_size
        att_weights = dy.softmax(att_weights)

        ctx = src_enc_all * att_weights

        return ctx, att_weights
예제 #44
0
파일: mtl.py 프로젝트: anbasile/mtl-tagging
def tag_sent(sent, builders):
    dy.renew_cg()
    f_init, b_init = [b.initial_state() for b in builders]
    wembs = [E[vw.w2i.get(w, UNK)] for w,t in sent]

    fw = [x.output() for x in f_init.add_inputs(wembs)]
    bw = [x.output() for x in b_init.add_inputs(reversed(wembs))]

    H = dy.parameter(pH)
    O = dy.parameter(pO)

    tags=[]
    for f,b,(w,t) in zip(fw,reversed(bw),sent):
        r_t = O*(dy.tanh(H * dy.concatenate([f,b])))
        # r_t = O*dy.concatenate([f,b])
        out = dy.softmax(r_t)
        chosen = np.argmax(out.npvalue())
        tags.append(vt.i2w[chosen])
    return tags
예제 #45
0
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = pc.parameter(decoder_w)
    b = pc.parameter(decoder_b)

    s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE*2))

    loss = []
    for char in output:
        vector = attend(vectors, s)

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = pc.softmax(out_vector)
        loss.append(-pc.log(pc.pick(probs, char)))
    loss = pc.esum(loss)
    return loss
예제 #46
0
 def sample(self, eos, max_len):
     #dy.renew_cg()
     #self.new_graph()
     state = self.rnn.initial_state()
     state = state.set_s(self.initial_state)
     sent = []
     while len(sent) < max_len:
         assert state != None
         so = state.output()
         assert so != None
         output_dist = dy.softmax(self.output_mlp(so))
         output_dist = output_dist.vec_value()
         word = sample(output_dist)
         sent.append(word)
         if word == eos:
             break
         word_emb = self.embed_word(word)
         state = state.add_input(word_emb)
     return sent
예제 #47
0
    def _attend(self, input_vectors, state):
        w1 = self.att_w1.expr(update=True)
        w2 = self.att_w2.expr(update=True)
        v = self.att_v.expr(update=True)
        attention_weights = []

        w2dt = w2 * state.h()[-1]
        for input_vector in input_vectors:
            attention_weight = v * dy.tanh(w1 * input_vector + w2dt)
            attention_weights.append(attention_weight)

        attention_weights = dy.softmax(dy.concatenate(attention_weights))

        output_vectors = dy.esum([
            vector * attention_weight for vector, attention_weight in zip(
                input_vectors, attention_weights)
        ])

        return output_vectors
예제 #48
0
파일: attention.py 프로젝트: kamigaito/cnn
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings]))
    loss = []
    for char in output:
        vector = dy.concatenate([attend(vectors, s), last_output_embeddings])

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
예제 #49
0
def create_network_return_best(inputs):
    '''
    inputs is a list of numbers
    '''
    dy.renew_cg()
    W = dy.parameter(pW)
    b = dy.parameter(pB)

    if(len(inputs) > documentLength):
       inputs = inputs[0:documentLength]    
    
    emb_vectors = [lookup[i] for i in inputs]
    
    while(len(emb_vectors) < documentLength):
        pad = dy.vecInput(embDimension)
        pad.set(np.zeros(embDimension))
        emb_vectors.append(pad)    
    
    net_input = dy.concatenate(emb_vectors)
    net_output = dy.softmax( (W*net_input) + b)
    return np.argmax(net_output.npvalue())
예제 #50
0
파일: rnnlm.py 프로젝트: danielhers/cnn
    def sample(self, first=1, nchars=0, stop=-1):
        res = [first]
        dy.renew_cg()
        state = self.builder.initial_state()

        cw = first
        while True:
            x_t = self.lookup[cw]
            state = state.add_input(x_t)
            y_t = state.output()
            r_t = self.bias + (self.R * y_t)
            ydist = dy.softmax(r_t)
            dist = ydist.vec_value()
            rnd = random.random()
            for i,p in enumerate(dist):
                rnd -= p
                if rnd <= 0: break
            res.append(i)
            cw = i
            if cw == stop: break
            if nchars and len(res) > nchars: break
        return res
예제 #51
0
def create_network_return_loss(inputs, expected_output):
    '''
    inputs is a list of numbers
    '''
    dy.renew_cg()
    W = dy.parameter(pW) # from parameters to expressions
    b = dy.parameter(pB)
    
    if(len(inputs) > documentLength):
       inputs = inputs[0:documentLength]
    
    emb_vectors = [lookup[i] for i in inputs]
    
    while(len(emb_vectors) < documentLength):
        pad = dy.vecInput(embDimension)
        pad.set(np.zeros(embDimension))
        emb_vectors.append(pad)
    
    net_input = dy.concatenate(emb_vectors)
    net_output = dy.softmax( (W*net_input) + b)
    loss = -dy.log(dy.pick(net_output, expected_output))
    return loss
예제 #52
0
파일: minibatch.py 프로젝트: danielhers/cnn
# regular lookup
a = lp[1].npvalue()
b = lp[2].npvalue()
c = lp[3].npvalue()

# batch lookup instead of single elements.
# two ways of doing this.
abc1 = dy.lookup_batch(lp, [1,2,3])
print(abc1.npvalue())

abc2 = lp.batch([1,2,3])
print(abc2.npvalue())

print(np.hstack([a,b,c]))


# use pick and pickneglogsoftmax in batch mode
# (must be used in conjunction with lookup_batch):
print("\nPick")
W = dy.parameter( m.add_parameters((5, 10)) )
h = W * lp.batch([1,2,3])
print(h.npvalue())
print(dy.pick_batch(h,[1,2,3]).npvalue())
print(dy.pick(W*lp[1],1).value(), dy.pick(W*lp[2],2).value(), dy.pick(W*lp[3],3).value())

# using pickneglogsoftmax_batch
print("\nPick neg log softmax")
print((-dy.log(dy.softmax(h))).npvalue())
print(dy.pickneglogsoftmax_batch(h,[1,2,3]).npvalue())
예제 #53
0
 def create_network_return_best(self, inputs, dropout=False):
     out = self(inputs, dropout)
     out = dy.softmax(out)
     return np.argmax(out.npvalue(), 0)