Ejemplo n.º 1
0
 def score(self, features, axis):
     """
     Calculate score for each label
     :param features: extracted feature values, of size input_size
     :param axis: axis of the label we are predicting
     :return: array with score for each label
     """
     super().score(features, axis)
     num_labels = self.num_labels[axis]
     if self.updates > 0 and num_labels > 1:
         if dynet_config.gpu():
             # RestrictedLogSoftmax is not implemented for GPU, so we move the value to CPU first
             value = dy.to_device(self.evaluate(features, axis), 'CPU')
             # then, we move it back to GPU (if the device name is '', the default device will be selected)
             value = dy.to_device(
                 dy.log_softmax(value, restrict=list(range(num_labels))),
                 '').npvalue()
         else:
             value = dy.log_softmax(self.evaluate(features, axis),
                                    restrict=list(
                                        range(num_labels))).npvalue()
         return value[:num_labels]
     self.config.print("  no updates done yet, returning zero vector.",
                       level=4)
     return np.zeros(num_labels)
Ejemplo n.º 2
0
 def rec(index):
     if (words[index] == -1):
         # branch node
         (l_loss, l_hidden) = rec(lchs[index])
         (r_loss, r_hidden) = rec(rchs[index])
         # i_gate = dy.logistic(U0i * l_hidden + U1i * r_hidden + bbi)
         # fl_gate = dy.logistic(U00f * l_hidden + U01f * r_hidden + bbf)
         # fr_gate = dy.logistic(U10f * l_hidden + U11f * r_hidden + bbf)
         # o_gate = dy.logistic(U0o * l_hidden + U1o * r_hidden + bbo)
         hidden = dy.tanh(U0u * l_hidden + U1u * r_hidden + bbu)
         # cell = dy.cmult(i_gate, u_value) + dy.cmult(fl_gate, l_cell) + dy.cmult(fr_gate, r_cell)
         # hidden = dy.cmult(o_gate, dy.tanh(cell))
         pred1 = dy.log_softmax(Why * hidden + by)
         loss = l_loss + r_loss - pred1[int(scores[index])]
         return (loss, hidden)
     else:
         embedding_tensor = dy.inputTensor(word_embedding[words[index]])
         # i_gate = dy.logistic(Wi * embedding_tensor + bi)
         # o_gate = dy.logistic(Wo * embedding_tensor + bo)
         hidden = dy.tanh(Wu * embedding_tensor + bu)
         # cell = dy.cmult(i_gate, u_value)
         # hidden = dy.cmult(o_gate, dy.tanh(cell))
         pred1 = dy.log_softmax(Why * hidden + by)
         loss = -pred1[int(scores[index])]
         return (loss, hidden)
Ejemplo n.º 3
0
    def compute_logits(self, input):
        W_type = dy.parameter(self.p_W_type)
        W_beaker_from = dy.parameter(self.p_W_beaker_from)
        W_beaker_to = dy.parameter(self.p_W_beaker_to)
        W_amount = dy.parameter(self.p_W_amount)

        type_logits = dy.log_softmax(W_type * input)
        beaker_from_logits = dy.log_softmax(W_beaker_from * input)
        beaker_to_logits = dy.log_softmax(W_beaker_to * input)
        amount_logits = dy.log_softmax(W_amount * input)
        return type_logits, beaker_from_logits, beaker_to_logits, amount_logits
    def compute_logits(self, input):
        W_type = dy.parameter(self.p_W_type)
        W_first_ix = dy.parameter(self.p_W_first_ix)
        W_second_ix = dy.parameter(self.p_W_second_ix)
        W_shape = dy.parameter(self.p_W_shape)

        type_logits = dy.log_softmax(W_type * input)
        first_ix_logits = dy.log_softmax(W_first_ix * input)
        second_ix_logits = dy.log_softmax(W_second_ix * input)
        shape_logits = dy.log_softmax(W_shape * input)
        return type_logits, first_ix_logits, second_ix_logits, shape_logits
Ejemplo n.º 5
0
    def compute_logits(self, input):
        W_type = dy.parameter(self.p_W_type)
        W_from = dy.parameter(self.p_W_from)
        W_to = dy.parameter(self.p_W_to)
        W_shirt = dy.parameter(self.p_W_shirt)
        # W_hat = dy.parameter(self.p_W_hat)

        type_logits = dy.log_softmax(W_type * input)
        from_logits = dy.log_softmax(W_from * input)
        to_logits = dy.log_softmax(W_to * input)
        shirt_logits = dy.log_softmax(W_shirt * input)
        # hat_logits = dy.log_softmax(W_hat * input)
        return type_logits, from_logits, to_logits, shirt_logits
    def compute_logits(self, input):
        W_type = dy.parameter(self.p_W_type)
        W_first_ix = dy.parameter(self.p_W_first_ix)
        W_second_ix = dy.parameter(self.p_W_second_ix)
        W_shape = dy.parameter(self.p_W_shape)

        type_logits = dy.log_softmax(W_type * input[:self.p_dim])
        first_ix_logits = dy.log_softmax(W_first_ix *
                                         input[self.p_dim:2 * self.p_dim])
        second_ix_logits = dy.log_softmax(W_second_ix *
                                          input[2 * self.p_dim:3 * self.p_dim])
        shape_logits = dy.log_softmax(W_shape * input[3 * self.p_dim:])
        return type_logits, first_ix_logits, second_ix_logits, shape_logits
Ejemplo n.º 7
0
    def compute_logits(self, input):
        W_type = dy.parameter(self.p_W_type)
        W_beaker_from = dy.parameter(self.p_W_beaker_from)
        W_beaker_to = dy.parameter(self.p_W_beaker_to)
        W_amount = dy.parameter(self.p_W_amount)

        p_dim = self.input_dim // 4

        type_logits = dy.log_softmax(W_type * input[:p_dim])
        beaker_from_logits = dy.log_softmax(W_beaker_from * input[p_dim:2*p_dim])
        beaker_to_logits = dy.log_softmax(W_beaker_to * input[2*p_dim:3*p_dim])
        amount_logits = dy.log_softmax(W_amount * input[3*p_dim:])

        return type_logits, beaker_from_logits, beaker_to_logits, amount_logits
Ejemplo n.º 8
0
    def __call__(self, sent1, sent2):
        """
        :param sent1: np matrix.
        :param sent2: np matrix.
        :return: np array of 3 elements.
        """
        sent1_linear, sent2_linear = self.apply_linear_embed(sent1, sent2)
        f1, f2 = self.apply_f(sent1_linear, sent2_linear)

        score1 = f1 * dy.transpose(f2)
        prob1 = dy.softmax(score1)
        score2 = dy.transpose(score1)
        prob2 = dy.softmax(score2)

        sent1_combine = dy.concatenate_cols(
            [sent1_linear, prob1 * sent2_linear])
        sent2_combine = dy.concatenate_cols(
            [sent2_linear, prob2 * sent1_linear])

        # sum
        g1, g2 = self.apply_g(sent1_combine, sent2_combine)
        sent1_output = dy.sum_dim(g1, [0])
        sent2_output = dy.sum_dim(g2, [0])

        input_combine = dy.transpose(
            dy.concatenate([sent1_output, sent2_output]))
        h = self.apply_h(input_combine)

        linear_final = dy.parameter(self.linear_final)
        h = h * linear_final

        output = dy.log_softmax(dy.transpose(h))
        return output
def log_softmax_costs(logits, costs=None, valid_actions=None):
    """Compute log softmax-margin with arbitrary costs."""
    if costs is not None:
        # each action gets a cost, the higher the overall score the better.
        # Typically, when adding `costs`, no `valid_actions` are passed.
        logits += dy.inputVector(costs)
    return dy.log_softmax(logits, restrict=valid_actions)
Ejemplo n.º 10
0
def generate(sent):
    dy.renew_cg()

    src = sent


    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()

    #generate until a eos tag or max is reached
    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])

    prev_word = sos_trg
    trg_sent = []
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for i in range(MAX_SENT_SIZE):
        #feed the previous word into the lstm, calculate the most likely word, add it to the sentence
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()
        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        probs = (-dy.log_softmax(s)).value()
        next_word = np.argmax(probs)

        if next_word == eos_trg:
            break
        prev_word = next_word
        trg_sent.append(i2w_trg[next_word])
    return trg_sent
Ejemplo n.º 11
0
 def sample_action(self,
                   state,
                   argmax=False,
                   sample_pp=None,
                   predefined_actions=None):
     policy = dy.log_softmax(self.policy_network(state))
     actions = []
     if predefined_actions is not None:
         # Use defined action value
         self.sampling_action = self.SamplingAction.PREDEFINED
         actions.extend(predefined_actions)
     else:
         # sample from policy
         for k in range(self.sample):
             sample = self.sample_from_policy(policy, argmax=argmax)
             if sample_pp is not None:
                 sample = sample_pp(sample)
             actions.append(sample)
             # only one sample during argmax
             if argmax:
                 break
     try:
         return actions
     finally:
         self.policy_lls.append(policy)
         self.actions.append(actions)
         self.states.append(state)
Ejemplo n.º 12
0
 def generate(self, context, trg, decorate=False, maxpossible=100):
     #greedy generation!
     prev_out=dy.zeros((self.hdim))
     outputs=[]
     for i in range(maxpossible):
         emb=dy.concatenate([context, prev_out])
         Ui,Uo,Uu = [dy.parameter(u) for u in self.US]
         Uf1= dy.parameter(self.UFS[0])
         bi,bo,bu,bf = [dy.parameter(b) for b in self.BS]
         #import pdb;pdb.set_trace()
         i = dy.logistic(bi+Ui*emb)
         o = dy.logistic(bi+Uo*emb)
         f = dy.logistic(bf+Uf1*emb)
         #print("hey")
         u = dy.tanh(bu+Uu*emb)
         c = dy.cmult(i,u) + dy.cmult(f,prev_out)
         h = dy.cmult(o,dy.tanh(c))
         if decorate: tree._e = h
         prev_out=c
         #pre1=dy.parameter(self.pre_l)
         pre2=dy.parameter(self.pred)
         out=dy.log_softmax(pre2*h)
         out=np.argmax(out)
         outputs.append(out)
         if out==1:
             print(outputs)
             print("-----")
             print(trg)
             return outputs
     print(outputs)
     print("---")
     print(trg)
     return outputs
Ejemplo n.º 13
0
    def decode_to_prediction(self, encoded, max_length):

        w = dy.parameter(self.w_softmax)
        b = dy.parameter(self.b_softmax)
        w1 = dy.parameter(self.attention_source)
        encoded_states = dy.concatenate_cols(encoded)

        attentional_component = w1 * encoded_states

        prev_output_embeddings = self.target_lookup[self.eos_target]
        current_state = self.decoder.initial_state().add_input(
            dy.concatenate(
                [dy.vecInput(self.hidden_size * 2), prev_output_embeddings]))

        result = ""
        for i in range(max_length):
            vector = dy.concatenate([
                self.attention(encoded_states, current_state,
                               attentional_component), prev_output_embeddings
            ])

            current_state = current_state.add_input(vector)
            s = dy.affine_transform([b, w, current_state.output()])
            probs = (dy.log_softmax(s)).value()
            next_word = np.argmax(probs)
            prev_output_embeddings = self.target_lookup[next_word]

            if (next_word == self.eos_target):
                return result[:-1]
            if next_word in self.targetDictionnary.keys():
                result += self.targetDictionnary[next_word] + " "
            else:
                result += self.targetDictionnary[unk_target] + " "
        return result[:-1]
Ejemplo n.º 14
0
  def sample_segmentation(self, encodings, batch_size):
    lmbd = self.lmbd.value() if self.lmbd is not None else 0
    eps = self.eps.value() if self.eps is not None else None
    segment_logsoftmaxes = [dy.log_softmax(self.segment_transform(fb)) for fb in encodings]
    # Flags
    is_presegment_provided = len(self.src_sent) != 0 and hasattr(self.src_sent[0], "annotation")
    is_warmup = lmbd == 0 or self.is_segmentation_warmup()
    is_epsgreedy_triggered = eps is not None and numpy.random.random() <= eps
    # Sample based on the criterion
    if self.learn_segmentation and not is_warmup and not self.train:
      # During testing always sample from softmax if it is not warmup
      segment_decisions = self.sample_from_softmax(encodings, batch_size, segment_logsoftmaxes)
    elif is_presegment_provided:
      segment_decisions = self.sample_from_prior(encodings, batch_size)
    elif is_warmup or is_epsgreedy_triggered:
      segment_decisions = self.sample_from_poisson(encodings, batch_size)
    else:
      segment_decisions = self.sample_from_softmax(encodings, batch_size, segment_logsoftmaxes)
    segment_decisions = segment_decisions.transpose()
    # The last segment decision of an active components should be equal to 1
    if encodings.mask is not None:
      src = self.src_sent
      mask = [numpy.nonzero(m)[0] for m in encodings.mask.np_arr.transpose()]
      assert len(segment_decisions) == len(mask), \
        "Len(seg)={}, Len(mask)={}".format(len(segment_decisions), len(mask))
      for i in range(len(segment_decisions)):
        if len(mask[i]) != 0:
          segment_decisions[i-1][mask[i]] = 1
    segment_decisions[-1][:] = 1

    return segment_decisions, segment_logsoftmaxes
Ejemplo n.º 15
0
    def generate_output(self,
                        decoder,
                        attender,
                        output_embedder,
                        dec_state,
                        src_length=None,
                        forced_trg_ids=None):
        score = 0.0
        word_ids = []
        attentions = []

        while (word_ids == []
               or word_ids[-1] != Vocab.ES) and len(word_ids) < self.max_len:
            if len(word_ids
                   ) > 0:  # don't feed in the initial start-of-sentence token
                dec_state = decoder.add_input(
                    dec_state,
                    output_embedder.embed(
                        word_ids[-1] if forced_trg_ids is None else
                        forced_trg_ids[len(word_ids) - 1]))
            dec_state.context = attender.calc_context(
                dec_state.rnn_state.output())
            logsoftmax = dy.log_softmax(
                decoder.get_scores(dec_state)).npvalue()
            if forced_trg_ids is None:
                cur_id = np.argmax(logsoftmax)
            else:
                cur_id = forced_trg_ids[len(word_ids)]

            score += logsoftmax[cur_id]
            word_ids.append(cur_id)
            attentions.append(attender.get_last_attention())

        return SearchOutput(word_ids, attentions), score
Ejemplo n.º 16
0
    def predict(self, batch_dict):
        dy.renew_cg()
        inputs = self.make_input(batch_dict)
        lengths = inputs['lengths']
        unaries = self.compute_unaries(inputs)
        if self.do_crf is True:
            best_path, path_score = self.crf.decode(unaries)
        elif self.constraint is not None:
            best_path, path_score = viterbi(
                unaries,
                dy.log_softmax(dy.inputTensor(self.constraint[1] * -1e4)),
                Offsets.GO,
                Offsets.EOS,
                norm=True)
        else:
            best_path = [np.argmax(x.npvalue(), axis=0) for x in unaries]
        # TODO: RN using autobatching, so none of this is really useful
        # If we want to support batching in this function we have to either loop over the batch
        # or we can just simplify all this code here
        best_path = np.stack(best_path).reshape(-1, 1)  # (T, B)

        best_path = best_path.transpose(1, 0)
        results = []

        for b in range(best_path.shape[0]):
            sentence = best_path[b, :lengths[b]]
            results.append(sentence)
        return results
Ejemplo n.º 17
0
    def predict(self, batch_dict):
        dy.renew_cg()
        inputs = self.make_input(batch_dict)
        lengths = inputs['lengths']
        unaries = self.compute_unaries(inputs)
        if self.do_crf is True:
            best_path, path_score = self.crf.decode(unaries)
        elif self.constraint is not None:
            best_path, path_score = viterbi(
                unaries,
                dy.log_softmax(dy.inputTensor(self.constraint[1] * -1e4)),
                Offsets.GO, Offsets.EOS,
                norm=True
            )
        else:
            best_path = [np.argmax(x.npvalue(), axis=0) for x in unaries]
        # TODO: RN using autobatching, so none of this is really useful
        # If we want to support batching in this function we have to either loop over the batch
        # or we can just simplify all this code here
        best_path = np.stack(best_path).reshape(-1, 1)  # (T, B)

        best_path = best_path.transpose(1, 0)
        results = []

        for b in range(best_path.shape[0]):
            sentence = best_path[b, :lengths[b]]
            results.append(sentence)
        return results
Ejemplo n.º 18
0
def viterbi(emissions, transition, start_idx, end_idx, norm=False):
    n_tags = emissions[0].dim()[0][0]
    backpointers = []

    inits = [-1e4] * n_tags
    inits[start_idx] = 0
    alphas = dy.inputVector(inits)
    alphas = dy.log_softmax(alphas) if norm else alphas

    for emission in emissions:
        next_vars = dy.colwise_add(dy.transpose(transition), alphas)
        best_tags = np.argmax(next_vars.npvalue(), 0)
        v_t = dy.max_dim(next_vars, 0)
        alphas = v_t + emission
        backpointers.append(best_tags)

    terminal_expr = alphas + dy.pick(transition, end_idx)
    best_tag = np.argmax(terminal_expr.npvalue())
    path_score = dy.pick(terminal_expr, best_tag)

    best_path = [best_tag]
    for bp_t in reversed(backpointers):
        best_tag = bp_t[best_tag]
        best_path.append(best_tag)
    _ = best_path.pop()
    best_path.reverse()
    return best_path, path_score
Ejemplo n.º 19
0
    def train(self, words, lemmas, gold, bad):
        dy.renew_cg()
        W = dy.parameter(self.pW)
        b = dy.parameter(self.pb)

        losses = []
        gold_scores = []
        bad_scores = []

        for item in gold:
            lf, denotation = item[0], item[1]
            feature = self.extract_feature(words, lemmas, lf, denotation)
            feature_vec = dy.vecInput(self.nfeatures)
            feature_vec.set(feature)
            gold_scores.append(W * feature_vec + b)

        for item in bad:
            lf, denotation = item[0], item[1]
            feature = self.extract_feature(words, lemmas, lf, denotation)
            feature_vec = dy.vecInput(self.nfeatures)
            feature_vec.set(feature)
            bad_scores.append(W * feature_vec + b)

        log_prob = dy.log_softmax(dy.concatenate(gold_scores + bad_scores))
        for i in range(len(gold_scores)):
            losses.append(dy.pick(log_prob, i))

        return -dy.esum(losses)
Ejemplo n.º 20
0
 def compute_output_log_probs(self, inputs, possible_actions, state=None, past_states=None, past_actions=None):
     assert state is not None
     W_context_action = dy.parameter(self.p_W_context_action)
     W_action = dy.parameter(self.p_W_action)
     unconstrained, support = self._log_probs_unconstrained_unnormed(inputs, possible_actions)
     unconstrained += action_in_state_context_bonuses(self.corpus, state, inputs, W_context_action, W_action, self.predict_invalid, past_states, past_actions)
     return dy.log_softmax(unconstrained, support)
Ejemplo n.º 21
0
    def calc_loss(
            self, x: dy.Expression,
            y: Union[numbers.Integral,
                     List[numbers.Integral]]) -> dy.Expression:

        scores = self.calc_scores(x)

        if self.label_smoothing == 0.0:
            # single mode
            if not batchers.is_batched(y):
                loss = dy.pickneglogsoftmax(scores, y)
            # minibatch mode
            else:
                loss = dy.pickneglogsoftmax_batch(scores, y)
        else:
            log_prob = dy.log_softmax(scores)
            if not batchers.is_batched(y):
                pre_loss = -dy.pick(log_prob, y)
            else:
                pre_loss = -dy.pick_batch(log_prob, y)

            ls_loss = -dy.mean_elems(log_prob)
            loss = ((1 - self.label_smoothing) *
                    pre_loss) + (self.label_smoothing * ls_loss)

        return loss
Ejemplo n.º 22
0
    def calc_loss(self, mlp_dec_state, ref_action):
        """
        Label Smoothing is implemented with reference to Section 7 of the paper
        "Rethinking the Inception Architecture for Computer Vision"
        (https://arxiv.org/pdf/1512.00567.pdf)
        """
        scores = self.get_scores(mlp_dec_state)

        if self.label_smoothing == 0.0:
            # single mode
            if not xnmt.batcher.is_batched(ref_action):
                return dy.pickneglogsoftmax(scores, ref_action)
            # minibatch mode
            else:
                return dy.pickneglogsoftmax_batch(scores, ref_action)

        else:
            log_prob = dy.log_softmax(scores)
            if not xnmt.batcher.is_batched(ref_action):
                pre_loss = -dy.pick(log_prob, ref_action)
            else:
                pre_loss = -dy.pick_batch(log_prob, ref_action)

            ls_loss = -dy.mean_elems(log_prob)
            loss = ((1 - self.label_smoothing) *
                    pre_loss) + (self.label_smoothing * ls_loss)
            return loss
Ejemplo n.º 23
0
def generate(sent):
    dy.renew_cg()

    src = sent

    # initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x]
                                            for x in src])[-1].output()

    # generate until a eos tag or max is reached
    current_state = LSTM_TRG_BUILDER.initial_state().set_s(
        [src_output, dy.tanh(src_output)])

    prev_word = sos_trg
    trg_sent = []
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for i in range(MAX_SENT_SIZE):
        # feed the previous word into the lstm, calculate the most likely word, add it to the sentence
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()
        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        probs = (-dy.log_softmax(s)).value()
        next_word = np.argmax(probs)

        if next_word == eos_trg:
            break
        prev_word = next_word
        trg_sent.append(i2w_trg[next_word])
    return trg_sent
Ejemplo n.º 24
0
def get_constit_loss(fws, bws, goldspans):
    if not USE_PTB_CONSTITS:
        raise Exception("should not be using the constit loss now!",
                        USE_PTB_CONSTITS)

    if len(goldspans) == 0:
        return None, 0

    losses = []
    sentlen = len(fws)

    for j in range(sentlen):
        istart = 0
        if USE_SPAN_CLIP and j > ALLOWED_SPANLEN:
            istart = max(0, j - ALLOWED_SPANLEN)
        for i in range(istart, j + 1):
            constit_ij = w_c * dy.rectify(
                w_fb * dy.concatenate([fws[i][j], bws[i][j]]) + b_fb) + b_c
            logloss = dy.log_softmax(constit_ij)

            isconstit = int((i, j) in goldspans)
            losses.append(pick(logloss, isconstit))

    ptbconstitloss = dy.scalarInput(DELTA) * -esum(losses)
    numspanstagged = len(losses)
    return ptbconstitloss, numspanstagged
Ejemplo n.º 25
0
 def cross_entropy_loss(self, scores, next_words):
     if self.label_smoothing:
         log_softmax = dy.log_softmax(scores)
         return -dy.pick_batch(log_softmax, next_words) * (1 - self.label_smoothing) \
                - dy.mean_elems(log_softmax) * self.label_smoothing
     else:
         return dy.pickneglogsoftmax_batch(scores, next_words)
Ejemplo n.º 26
0
def viterbi(emissions, transition, start_idx, end_idx, norm=False):
    n_tags = emissions[0].dim()[0][0]
    backpointers = []

    inits = [-1e4] * n_tags
    inits[start_idx] = 0
    alphas = dy.inputVector(inits)
    alphas = dy.log_softmax(alphas) if norm else alphas

    for emission in emissions:
        next_vars = dy.colwise_add(dy.transpose(transition), alphas)
        best_tags = np.argmax(next_vars.npvalue(), 0)
        v_t = dy.max_dim(next_vars, 0)
        alphas = v_t + emission
        backpointers.append(best_tags)

    terminal_expr = alphas + dy.pick(transition, end_idx)
    best_tag = np.argmax(terminal_expr.npvalue())
    path_score = dy.pick(terminal_expr, best_tag)

    best_path = [best_tag]
    for bp_t in reversed(backpointers):
        best_tag = bp_t[best_tag]
        best_path.append(best_tag)
    _ = best_path.pop()
    best_path.reverse()
    return best_path, path_score
Ejemplo n.º 27
0
    def _policy_shape_probs(self,
                            prob_dist):
        # TODO: this is specific to Alchemy
        num_actions = len(self.output_action_vocabulary) - 1
        num_locations = len(self.output_location_vocabulary) - 1
        num_arguments = len(self.output_argument_vocabulary) - 1
        new_probdist = dy.zeros(prob_dist.dim()[0])
        zeroes = numpy.zeros(num_locations * num_arguments)
        ones = numpy.ones(num_locations * num_arguments)
        eos_prob = prob_dist[self._all_output_vocabulary.lookup_index((EOS, NO_ARG, NO_ARG))]
        action_idx = 0
        for action in self.output_action_vocabulary:
            masks = numpy.concatenate(
                        (numpy.repeat(zeroes, action_idx),
                         ones,
                         numpy.repeat(zeroes, num_actions - action_idx - 1)))
            actions_masks = dy.reshape(dy.inputTensor(masks),
                                       (num_actions * num_locations * num_arguments, 1))
            if action == EOS:
                new_probdist += dy.cmult(actions_masks, prob_dist) / 2.
            elif action == "push":
                new_probdist += dy.cmult(actions_masks, prob_dist) + eos_prob / (2. * 56.)
            elif action == "pop":
                new_probdist += dy.cmult(actions_masks, prob_dist)

        if self.args.syntax_restricted:
            return dy.exp(dy.log_softmax(dy.cmult(new_probdist, prob_dist),
                                         restrict = self._valid_action_indices))
        else:
            return dy.softmax(dy.cmult(new_probdist, prob_dist))
Ejemplo n.º 28
0
  def __call__(self, translator, dec_state, src, trg):
    # TODO: apply trg.mask ?
    samples = []
    logsofts = []
    self.bs = []
    done = [False for _ in range(len(trg))]
    for _ in range(self.sample_length):
      dec_state.context = translator.attender.calc_context(dec_state.rnn_state.output())
      if self.use_baseline:
        h_t = dy.tanh(translator.decoder.context_projector(dy.concatenate([dec_state.rnn_state.output(), dec_state.context])))
        self.bs.append(self.baseline(dy.nobackprop(h_t)))
      logsoft = dy.log_softmax(translator.decoder.get_scores(dec_state))
      sample = logsoft.tensor_value().categorical_sample_log_prob().as_numpy()[0]
      # Keep track of previously sampled EOS
      sample = [sample_i if not done_i else Vocab.ES for sample_i, done_i in zip(sample, done)]
      # Appending and feeding in the decoder
      logsoft = dy.pick_batch(logsoft, sample)
      logsofts.append(logsoft)
      samples.append(sample)
      dec_state = translator.decoder.add_input(dec_state, translator.trg_embedder.embed(xnmt.batcher.mark_as_batch(sample)))
      # Check if we are done.
      done = list(six.moves.map(lambda x: x == Vocab.ES, sample))
      if all(done):
        break

    samples = np.stack(samples, axis=1).tolist()
    self.eval_score = []
    for trg_i, sample_i in zip(trg, samples):
      # Removing EOS
      try:
        idx = sample_i.index(Vocab.ES)
        sample_i = sample_i[:idx]
      except ValueError:
        pass
      try:
        idx = trg_i.words.index(Vocab.ES)
        trg_i.words = trg_i.words[:idx]
      except ValueError:
        pass
      # Calculate the evaluation score
      score = 0 if not len(sample_i) else self.evaluation_metric.evaluate_fast(trg_i.words, sample_i)
      self.eval_score.append(score)
    self.true_score = dy.inputTensor(self.eval_score, batched=True)
    loss = LossBuilder()

    if self.use_baseline:
      for i, (score, _) in enumerate(zip(self.bs, logsofts)):
        logsofts[i] = dy.cmult(logsofts[i], score - self.true_score)
      loss.add_loss("Reinforce", dy.sum_elems(dy.esum(logsofts)))

    else:
        loss.add_loss("Reinforce", dy.sum_elems(dy.cmult(-self.true_score, dy.esum(logsofts))))

    if self.use_baseline:
      baseline_loss = []
      for bs in self.bs:
        baseline_loss.append(dy.squared_distance(self.true_score, bs))
      loss.add_loss("Baseline", dy.sum_elems(dy.esum(baseline_loss)))
    return loss
Ejemplo n.º 29
0
    def _encodings_to_label_log_probabilities(self, encodings, lmbd=None):
        label_scores = self.f_label(dy.concatenate_to_batch(encodings))
        label_scores_reshaped = dy.reshape(label_scores, (self.label_vocab.size, len(encodings)))

        if lmbd is not None:
            label_scores_reshaped = dy.cmult(label_scores_reshaped, lmbd)

        return dy.log_softmax(label_scores_reshaped)
Ejemplo n.º 30
0
    def compute_output_log_probs(self, inputs, possible_actions, state=None, past_states=None, past_actions=None):
        assert len(inputs) == 1
        input = inputs[0]
        type_logits, from_logits, to_logits, shirt_logits = self.compute_logits(input)
        support = sorted([self.corpus.ACTIONS_TO_INDEX[ac] for ac in possible_actions])

        unconstrained = self.combine_logits(type_logits, from_logits, to_logits, shirt_logits)

        return dy.log_softmax(unconstrained, support)
Ejemplo n.º 31
0
 def _get_transition(self, stack, buffer, empty_buffer, valid_transitions):
     stack_embedding = stack[-1][0].output(
     )  # the stack is not empty so we should decide transition
     buffer_embedding = buffer[-1][0] if buffer else empty_buffer
     parser_state = dy.concatenate([buffer_embedding, stack_embedding])
     h = dy.rectify(self.s2h * parser_state + self.s2h_b)
     logits = self.h2t * h + self.h2t_b
     logps = dy.log_softmax(logits, valid_transitions)
     return logps, h
 def compute_output_log_probs(self,
                              inputs,
                              possible_actions,
                              state=None,
                              past_states=None,
                              past_actions=None):
     unconstrained, support = self._log_probs_unconstrained_unnormed(
         inputs, possible_actions)
     return dy.log_softmax(unconstrained, support)
Ejemplo n.º 33
0
        def __call__(self, query, options, gold, lengths, query_no):
            if len(options) == 1:
                return None, 0

            final = []
            if args.word_vectors:
                qvecs = [dy.lookup(self.pEmbedding, w) for w in query]
                qvec_max = dy.emax(qvecs)
                qvec_mean = dy.average(qvecs)
            for otext, features in options:
                if not args.no_features:
                    inputs = dy.inputTensor(features)
                if args.word_vectors:
                    ovecs = [dy.lookup(self.pEmbedding, w) for w in otext]
                    ovec_max = dy.emax(ovecs)
                    ovec_mean = dy.average(ovecs)
                    if args.no_features:
                        inputs = dy.concatenate(
                            [qvec_max, qvec_mean, ovec_max, ovec_mean])
                    else:
                        inputs = dy.concatenate(
                            [inputs, qvec_max, qvec_mean, ovec_max, ovec_mean])
                if args.drop > 0:
                    inputs = dy.dropout(inputs, args.drop)
                h = inputs
                for pH, pB in zip(self.hidden, self.bias):
                    h = dy.affine_transform([pB, pH, h])
                    if args.nonlin == "linear":
                        pass
                    elif args.nonlin == "tanh":
                        h = dy.tanh(h)
                    elif args.nonlin == "cube":
                        h = dy.cube(h)
                    elif args.nonlin == "logistic":
                        h = dy.logistic(h)
                    elif args.nonlin == "relu":
                        h = dy.rectify(h)
                    elif args.nonlin == "elu":
                        h = dy.elu(h)
                    elif args.nonlin == "selu":
                        h = dy.selu(h)
                    elif args.nonlin == "softsign":
                        h = dy.softsign(h)
                    elif args.nonlin == "swish":
                        h = dy.cmult(h, dy.logistic(h))
                final.append(dy.sum_dim(h, [0]))

            final = dy.concatenate(final)
            nll = -dy.log_softmax(final)
            dense_gold = []
            for i in range(len(options)):
                dense_gold.append(1.0 / len(gold) if i in gold else 0.0)
            answer = dy.inputTensor(dense_gold)
            loss = dy.transpose(answer) * nll
            predicted_link = np.argmax(final.npvalue())

            return loss, predicted_link
Ejemplo n.º 34
0
 def score(self, features, axis):
     """
     Calculate score for each label
     :param features: extracted feature values, of size input_size
     :param axis: axis of the label we are predicting
     :return: array with score for each label
     """
     super().score(features, axis)
     num_labels = self.num_labels[axis]
     if self.updates > 0 and num_labels > 1:
         if dynet_config.gpu():
             # RestrictedLogSoftmax is not implemented for GPU, so we move the value to CPU first
             value = dy.to_device(self.evaluate(features, axis), 'CPU')
             # then, we move it back to GPU (if the device name is '', the default device will be selected)
             value = dy.to_device(dy.log_softmax(value, restrict=list(range(num_labels))), '').npvalue()
         else:
             value = dy.log_softmax(self.evaluate(features, axis), restrict=list(range(num_labels))).npvalue()
         return value[:num_labels]
     self.config.print("  no updates done yet, returning zero vector.", level=4)
     return np.zeros(num_labels)
Ejemplo n.º 35
0
def generate(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent

    #get the output of the first LSTM
    src_outputs =  [dy.concatenate([x.output(), y.output()]) for x,y in LSTM_SRC.add_inputs([LOOKUP_SRC[word] for word in src])]

    src_output = src_outputs[-1]

    #gets the parameters for the attention
    src_output_matrix = dy.concatenate_cols(src_outputs)
    w1_att_src = dy.parameter(w1_att_src_p)
    fixed_attentional_component = w1_att_src * src_output_matrix



    #generate until a eos tag or max is reached
    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])

    prev_word = sos_trg
    trg_sent = []
    attention_matrix = []
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    W_m = dy.parameter(W_m_p)
    b_m = dy.parameter(b_m_p)



    for i in range(MAX_SENT_SIZE):
        #feed the previous word into the lstm, calculate the most likely word, add it to the sentence
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()
        att_output, alignment = calc_attention(src_output_matrix, output_embedding, fixed_attentional_component)
        attention_matrix.append(alignment)
        middle_expr = dy.tanh(dy.affine_transform([b_m, W_m, dy.concatenate([output_embedding, att_output])]))
        s = dy.affine_transform([b_sm, W_sm, middle_expr])
        probs = (-dy.log_softmax(s)).value()
        next_word = np.argmax(probs)

        if next_word == eos_trg:
            break
        prev_word = next_word
        trg_sent.append(i2w_trg[next_word])
    return trg_sent, dy.concatenate_cols(attention_matrix).value()
Ejemplo n.º 36
0
 def calc_loss(self, scores, axis, true, importance):
     ret = [i * dy.pickneglogsoftmax(scores, t) for t, i in zip(true, importance)]
     if self.loss == "max_margin":
         ret.append(dy.max_dim(dy.log_softmax(scores, restrict=list(set(range(self.num_labels[axis])) - set(true)))))
     return ret
Ejemplo n.º 37
0
 def prediction(self, x):
     return [dy.log_softmax(y) for y in self.output(x)]
Ejemplo n.º 38
0
  def parse(self, t, oracle_actions=None):
    dy.renew_cg()
    self.NULL_REP = self.WORDS_LOOKUP[self.nwords-1]
    if oracle_actions:
      oracle_actions = list(oracle_actions)
      oracle_actions.reverse()
    toks = list(t)
    toks.reverse()
    stack = []
    buffer = []
    W1 = dy.parameter(self.pW1)
    b1 = dy.parameter(self.pb1)
    W_act = dy.parameter(self.pW_act)
    b_act = dy.parameter(self.pb_act)
    losses = []
    for tok in toks:
      tok_embedding = self.WORDS_LOOKUP[tok]
      buffer.append(Head(self.vocab.i2w[tok], tok_embedding))

    while not (len(stack) == 1 and len(buffer) == 0):
      # based on parser state, get valid actions
      valid_actions = []
      if len(buffer) > 0:  # can only reduce if elements in buffer
        valid_actions += [SHIFT]
      if len(stack) >= 2:  # can only shift if 2 elements on stack
        valid_actions += [REDUCE_L, REDUCE_R]

      # compute probability of each of the actions and choose an action
      # either from the oracle or if there is no oracle, based on the model
      action = valid_actions[0]
      log_probs = None
      if len(valid_actions) > 1:
        representations = self.extract_features(stack, buffer)
        h = dy.cube(W1*dy.concatenate(representations) + b1)
        logits = W_act * h + b_act
        log_probs = dy.log_softmax(logits, valid_actions)
        if oracle_actions is None:
          action = max(enumerate(log_probs.vec_value()), key=itemgetter(1))[0]
      if oracle_actions is not None:
        action = oracle_actions.pop()
        if log_probs is not None:
          # append the action-specific loss
          losses.append(dy.pick(log_probs, action))

      # execute the action to update the parser state
      if action == SHIFT:
        token = buffer.pop()
        stack.append(token)
      else: # one of the reduce actions
        right = stack.pop()
        left = stack.pop()
        head, modifier = (left, right) if action == REDUCE_R else (right, left)
        #add the tokens and their embeddings into the children list
        if action == REDUCE_R:
          head.add_child(modifier, 'right')
        else:
          head.add_child(modifier, 'left')
        stack.append(head)
        if oracle_actions is None:
          print('{0} --> {1}'.format(head.word, modifier.word))

    # the head of the tree that remains at the top of the stack is now the root
    if oracle_actions is None:
      head = stack.pop().word
      print('ROOT --> {0}'.format(head))
    return -dy.esum(losses) if losses else None