Ejemplo n.º 1
0
 def pick_neg_log(self, pred, gold):
     # TODO make this a static function in both classes
     if not isinstance(gold, int) and not isinstance(gold, np.int64):
         # calculate cross-entropy loss against the whole vector
         dy_gold = dynet.inputVector(gold)
         return -dynet.sum_elems(dynet.cmult(dy_gold, dynet.log(pred)))
     return -dynet.log(dynet.pick(pred, gold))
Ejemplo n.º 2
0
def select_action(tree, policy, choose_max=False, return_prob=False, mode='train'):
    prob, pairs = policy.selection_by_tree(tree, mode)
    if pairs is None:
        if return_prob:
            return None, None, None, None
        else:
            return None, None, None
    with np.errstate(all='raise'):
        try:
            prob_v = prob.npvalue()
            if choose_max:
                idx = np.argmax(prob_v)
            else:
                # if np.random.random() < policy.epsilon:
                #     idx = np.random.randint(len(prob_v))
                #     while prob_v[idx] == 0:
                #         idx = np.random.randint(len(prob_v))
                # else:
                idx = np.random.choice(range(len(prob_v)), p=prob_v / np.sum(prob_v))
        except:
            for para in policy.model_parameters:
                check_error(para, dy.parameter(policy.model_parameters[para]))
            check_error('history', policy.history.output())
            check_error('pr', prob)
    action = prob[idx]
    policy.saved_actions[-1].append(action)
    policy.update_history(pairs[idx])
    if return_prob:
        return pairs[idx], prob_v[idx], pairs, prob_v
    return pairs[idx], prob_v[idx], dy.mean_elems(dy.cmult(prob, dy.log(prob)))
Ejemplo n.º 3
0
 def get_summer(s, size):  # list of values (bidirection) => one value
     if s == "avg":
         return dy.average
     else:
         mask = [0. for _ in range(size // 2)
                 ] + [1. for _ in range(size // 2)]
         mask2 = [1. for _ in range(size // 2)
                  ] + [0. for _ in range(size // 2)]
         if s == "fend":
             return lambda x: dy.cmult(dy.inputVector(mask2), x[-1])
         elif s == "bend":
             return lambda x: dy.cmult(dy.inputVector(mask), x[0])
         elif s == "ends":
             return lambda x: dy.cmult(dy.inputVector(mask2), x[
                 -1]) + dy.cmult(dy.inputVector(mask), x[0])
         else:
             return None
Ejemplo n.º 4
0
 def __call__(self, input_exp, hidden_exp, mask=None):
     # two kinds of dropouts
     if self.idrop > 0.:
         input_exp = dy.dropout(input_exp, self.idrop)
     input_exp_g = input_exp_t = input_exp
     hidden_exp_g = hidden_exp_t = hidden_exp["H"]
     if self.gdrop > 0.:
         input_exp_g = dy.cmult(input_exp, self.masks[0])
         hidden_exp_g = dy.cmult(hidden_exp_g, self.masks[1])
         input_exp_t = dy.cmult(input_exp, self.masks[2])
         hidden_exp_t = dy.cmult(hidden_exp_t, self.masks[3])
     rzt = dy.affine_transform([
         self.iparams["brz"], self.iparams["x2rz"], input_exp_g,
         self.iparams["h2rz"], hidden_exp_g
     ])
     rzt = dy.logistic(rzt)
     rt, zt = dy.pick_range(rzt, 0, self.n_hidden), BK.pick_range(
         rzt, self.n_hidden, 2 * self.n_hidden)
     h_reset = dy.cmult(rt, hidden_exp_t)
     ht = dy.affine_transform([
         self.iparams["bh"], self.iparams["x2h"], input_exp_t,
         self.iparams["h2h"], h_reset
     ])
     ht = dy.tanh(ht)
     hidden = dy.cmult(zt, hidden_exp["H"]) + dy.cmult(
         (1. - zt), ht)  # first one use original hh
     # mask: if 0 then pass through
     if mask is not None:
         mask_array = np.asarray(mask).reshape((1, -1))
         m1 = dy.inputTensor(mask_array, True)  # 1.0 for real words
         m0 = dy.inputTensor(1.0 - mask_array,
                             True)  # 1.0 for padding words (mask=0)
         hidden = hidden * m1 + hidden_exp["H"] * m0
     return {"H": hidden}
Ejemplo n.º 5
0
    def attend(self, encoded_inputs, h_t, input_masks=None):
        # encoded_inputs dimension is: seq len x 2*h x batch size, h_t dimension is h x batch size (for bilstm encoder)
        if len(encoded_inputs) == 1:
            # no need to attend if only one input state, compute output directly
            h_output = dn.tanh(self.w_c *
                               dn.concatenate([h_t, encoded_inputs[0]]))
            # return trivial alphas (all 1's since one input gets all attention)
            if input_masks:
                # if batching
                alphas = dn.inputTensor([1] * len(input_masks[0]),
                                        batched=True)
            else:
                alphas = dn.inputTensor([1], batched=True)
            return h_output, alphas

        # iterate through input states to compute attention scores
        # scores = [v_a * dn.tanh(w_a * h_t + u_a * h_input) for h_input in blstm_outputs]
        w_a_h_t = self.w_a * h_t
        scores = [
            self.v_a *
            dn.tanh(dn.affine_transform([w_a_h_t, self.u_a, h_input]))
            for h_input in encoded_inputs
        ]

        concatenated = dn.concatenate(scores)
        if input_masks:
            # if batching, multiply attention scores with input masks to zero-out scores for padded inputs
            dn_masks = dn.inputTensor(input_masks, batched=True)
            concatenated = dn.cmult(concatenated, dn_masks)

        # normalize scores
        alphas = dn.softmax(concatenated)

        # compute context vector with weighted sum for each seq in batch
        bo = dn.concatenate_cols(encoded_inputs)
        c = bo * alphas
        # c = dn.esum([h_input * dn.pick(alphas, j) for j, h_input in enumerate(blstm_outputs)])

        # compute output vector using current decoder state and context vector
        h_output = dn.tanh(self.w_c * dn.concatenate([h_t, c]))

        return h_output, alphas
Ejemplo n.º 6
0
    def __call__(self, x_embs):
        x_len = len(x_embs)

        # BiGRU
        hf = dy.concatenate_cols(
            self.fGRUBuilder.initial_state().transduce(x_embs))
        hb = dy.concatenate_cols(self.bGRUBuilder.initial_state().transduce(
            x_embs[::-1])[::-1])
        h = dy.concatenate([hf, hb])

        # Selective Gate
        hb_1 = dy.pick(hb, index=0, dim=1)
        hf_n = dy.pick(hf, index=x_len - 1, dim=1)
        s = dy.concatenate([hb_1, hf_n])

        # Selection
        sGate = dy.logistic(dy.colwise_add(self.Ws * h, self.Us * s + self.bs))
        hp = dy.cmult(h, sGate)

        return hp, hb_1
Ejemplo n.º 7
0
    def __call__(self, x, tm1s=None, test=False):
        if test:
            # Initial states
            s_tm1 = tm1s[0]
            c_tm1 = tm1s[1]
            w_tm1 = x

            # GRU
            s_t = self.GRUBuilder.initial_state().set_s([s_tm1]).add_input(
                dy.concatenate([w_tm1, c_tm1])).output()

            # Attention
            e_t = dy.pick(
                self.va *
                dy.tanh(dy.colwise_add(self.Ua * self.hp, self.Wa * s_tm1)), 0)
            a_t = dy.softmax(e_t)
            c_t = dy.esum([
                dy.cmult(a_t_i, h_i)
                for a_t_i, h_i in zip(a_t, dy.transpose(self.hp))
            ])
            #c_t = self.hp*a_t # memory error?

            # Output
            r_t = dy.concatenate_cols([
                Wr_j * w_tm1 + Ur_j * c_t + Vr_j * s_t
                for Wr_j, Ur_j, Vr_j in zip(self.Wr, self.Ur, self.Vr)
            ])  # Maxout
            m_t = dy.max_dim(r_t, d=1)
            y_t = dy.softmax(self.Wo * m_t)

            return s_t, c_t, y_t

        else:
            w_embs = x
            # Initial states
            s_tm1 = self.s_0
            c_tm1 = self.c_0
            GRU = self.GRUBuilder.initial_state().set_s([s_tm1])

            y = []
            for w_tm1 in w_embs:
                # GRU
                GRU = GRU.add_input(dy.concatenate([w_tm1, c_tm1]))
                s_t = GRU.output()

                # Attention
                e_t = dy.pick(
                    self.va * dy.tanh(
                        dy.colwise_add(self.Ua * self.hp, self.Wa * s_tm1)), 0)
                a_t = dy.softmax(e_t)
                c_t = dy.esum([
                    dy.cmult(a_t_i, h_i)
                    for a_t_i, h_i in zip(a_t, dy.transpose(self.hp))
                ])
                #c_t = self.hp*a_t # memory error?

                # Output
                r_t = dy.concatenate_cols([
                    Wr_j * w_tm1 + Ur_j * c_t + Vr_j * s_t
                    for Wr_j, Ur_j, Vr_j in zip(self.Wr, self.Ur, self.Vr)
                ])  # Maxout
                m_t = dy.max_dim(r_t, d=1)

                y_t = self.Wo * m_t
                y.append(y_t)

                # t -> tm1
                s_tm1 = s_t
                c_tm1 = c_t

            return y
Ejemplo n.º 8
0
 def cosine(self, e1, e2):
     return dynet.cdiv(
         dynet.dot_product(e1, e2),
         (dynet.cmult(dynet.squared_norm(e1), dynet.squared_norm(e2))))
Ejemplo n.º 9
0
 def pick_neg_log(self, pred, gold):
     if hasattr(gold, "__len__"):
         # calculate cross-entropy loss against the whole vector
         dy_gold = dynet.inputVector(gold)
         return -dynet.sum_elems(dynet.cmult(dy_gold, dynet.log(pred)))
     return -dynet.log(dynet.pick(pred, gold))
Ejemplo n.º 10
0
 def pick_neg_log(self, pred, gold):
     if not isinstance(gold, int):
         # calculate cross-entropy loss against the whole vector
         dy_gold = dynet.inputVector(gold)
         return -dynet.sum_elems(dynet.cmult(dy_gold, dynet.log(pred)))
     return -dynet.log(dynet.pick(pred, gold))