コード例 #1
0
ファイル: model.py プロジェクト: nlpaueb/aueb-bioasq6
    def GetQDScore(self, qwords, qreps, dwords, dreps, extra):
        nq = len(qreps)
        nd = len(dreps)
        qgl = [
            self.W_gate.expr() *
            dy.concatenate([qv, dy.constant(1, self.idf_val(qw))])
            for qv, qw in zip(qreps, qwords)
        ]
        qgates = dy.softmax(dy.concatenate(qgl))

        qscores = []
        for qtok in range(len(qreps)):
            qrep = qreps[qtok]
            att_scores = [dy.dot_product(qrep, drep) for drep in dreps]
            att_probs = dy.softmax(dy.concatenate(att_scores))
            doc_rep = dy.esum([v * p for p, v in zip(att_probs, dreps)])
            input_vec = dy.cmult(qrep, doc_rep)
            #input_dot = dy.sum_elems(input_vec)
            #input_len = dy.l2_norm(qrep - doc_rep)
            #input_vec = dy.concatenate([input_vec, input_dot, input_len])

            layer = utils.leaky_relu(self.b_term.expr() +
                                     self.W_term.expr() * input_vec)
            score = (self.b_term2.expr() + self.W_term2.expr() * layer)
            qscores.append(score)

        # Final scores and ultimate classifier.
        qterm_score = dy.dot_product(dy.concatenate(qscores), qgates)

        fin_score = (
            self.b_final.expr() +
            self.W_final.expr() * dy.concatenate([qterm_score, extra]))
        return fin_score
コード例 #2
0
    def cal_scores(self, src_encodings, masks, train):

        src_len = len(src_encodings)
        batch_size = src_encodings[0].dim()[1]
        heads_LRlayer = []
        mods_LRlayer = []
        for encoding in src_encodings:
            heads_LRlayer.append(
                self.leaky_ReLu(self.b_head.expr() +
                                self.W_head.expr() * encoding))
            mods_LRlayer.append(
                self.leaky_ReLu(self.b_mod.expr() +
                                self.W_mod.expr() * encoding))

        heads_labels = []
        heads = []
        labels = []
        neg_inf = dy.constant(1, -float("inf"))
        for row in range(
                1, src_len
        ):  #exclude root @ index=0 since roots do not have heads

            scores_idx = []
            for col in range(src_len):

                dist = col - row
                mdist = self.dist_max
                dist_i = (min(dist, mdist - 1) + mdist if dist >= 0 else int(
                    min(-1.0 * dist, mdist - 1)))
                dist_vec = dy.lookup_batch(self.dlookup, [dist_i] * batch_size)
                if train:
                    input_vec = dy.concatenate([
                        dy.esum([
                            dy.dropout(heads_LRlayer[col], self.dropout),
                            dy.dropout(mods_LRlayer[row], self.dropout)
                        ]), dist_vec
                    ])
                else:
                    input_vec = dy.concatenate([
                        dy.esum([heads_LRlayer[col], mods_LRlayer[row]]),
                        dist_vec
                    ])
                score = self.scoreHeadModLabel(input_vec, train)
                mask = masks[row] and masks[col]
                join_scores = []
                for bdx in range(batch_size):
                    if (mask[bdx] == 1):
                        join_scores.append(dy.pick_batch_elem(score, bdx))
                    else:
                        join_scores.append(
                            dy.concatenate([neg_inf] * self.n_labels))
                scores_idx.append(dy.concatenate_to_batch(join_scores))
            heads_labels.append(dy.concatenate(scores_idx))

        return heads_labels
コード例 #3
0
def calc_reinforce_loss(words, tags, delta):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    word_reps = LSTM.transduce([LOOKUP[x] for x in words])

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)

    #calculate the probability distribution
    scores = [dy.affine_transform([b, W, x]) for x in word_reps]
    losses = [
        dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags)
    ]
    probs = [-dy.exp(loss).as_array() for loss in losses]

    #then take samples from the probability distribution
    samples = [np.random.choice(range(len(x)), p=x) for x in probs]

    #calculate accuracy=reward
    correct = [sample == tag for sample, tag in zip(samples, tags)]
    r_i = float(sum(correct)) / len(correct)
    r = dy.constant((1), r_i)
    # Reward baseline for each word
    W_bl = dy.parameter(W_bl_p)
    b_bl = dy.parameter(b_bl_p)
    r_b = [
        dy.affine_transform([b_bl, W_bl, dy.nobackprop(x)]) for x in word_reps
    ]

    #we need to take the value in order to break the computation graph
    #as the reward portion is trained seperatley and not backpropogated through during the overall score
    rewards_over_baseline = [(r - dy.nobackprop(x)) for x in r_b]
    #the scores for training the baseline
    baseline_scores = [dy.square(r - x) for x in r_b]

    #then calculate the reinforce scores using reinforce
    reinforce_scores = [
        r_s * score for r_s, score in zip(rewards_over_baseline, scores)
    ]

    #we want the first len(sent)-delta scores from xent then delta scores from reinforce
    #for mixer
    if len(scores) > delta:
        mixer_scores = scores[:len(scores) - delta] + reinforce_scores[delta -
                                                                       1:]
    else:
        mixer_scores = reinforce_scores
    return dy.esum(mixer_scores), dy.esum(baseline_scores)
コード例 #4
0
def do_one_sequence(rnn, params, sequence):
    # setup the sequence
    dy.renew_cg()
    s0 = rnn.initial_state()

    R = params["R"]
    bias = params["bias"]
    lookup = params["lookup"]
    input_sequence = [input_token2int[t] for (t, _) in sequence]
    output_sequence = [output_token2int[t] for (_, t) in sequence]
    s = s0
    loss = []
    for input_token, output_token in zip(input_sequence, output_sequence):
        s = s.add_input(lookup[input_token])
        probs = dy.softmax(R * s.output() + bias)

        # MinMax to avoid undefined 0/1 probabilities
        # 1e-15 is arbitrary
        min_val = dy.constant((OUTPUT_VOCAB_SIZE, ), 1e-15)
        max_val = dy.constant((OUTPUT_VOCAB_SIZE, ), 1 - 1e-15)
        probs = dy.bmax(dy.bmin(probs, max_val), min_val)
        loss.append(-dy.log(dy.pick(probs, output_token)))
    loss = dy.esum(loss)
    return loss
コード例 #5
0
    def learn(self, batch_size):
        if self.prioritized:
            if not self.memory.is_full(): return -np.inf
            indices, exps, weights = self.memory.sample(batch_size, self.beta)
        else:
            exps = self.memory.sample(batch_size)
        obss, actions, rewards, obs_nexts, dones = self._process(exps)

        dy.renew_cg()
        target_network = self.target_network if self.use_double_dqn else self.network
        if self.dueling:
            target_values, v = target_network(obs_nexts, batched=True)
            target_values = target_values.npvalue() + v.npvalue()
        else:
            target_values = target_network(obs_nexts, batched=True)
            target_values = target_values.npvalue()
        target_values = np.max(target_values, axis=0)
        target_values = rewards + self.reward_decay * (target_values *
                                                       (1 - dones))

        dy.renew_cg()
        if self.dueling:
            all_values_expr, v = self.network(obss, batched=True)
        else:
            all_values_expr = self.network(obss, batched=True)
        picked_values = dy.pick_batch(all_values_expr, actions)
        diff = (picked_values + v if self.dueling else
                picked_values) - dy.inputTensor(target_values, batched=True)
        if self.prioritized:
            self.memory.update(indices, np.transpose(np.abs(diff.npvalue())))
        losses = dy.pow(diff, dy.constant(1, 2))
        if self.prioritized:
            losses = dy.cmult(losses, dy.inputTensor(weights, batched=True))
        loss = dy.sum_batches(losses)
        loss_value = loss.npvalue()
        loss.backward()
        self.trainer.update()

        self.epsilon = max(self.epsilon - self.epsilon_decrease,
                           self.epsilon_lower)
        if self.prioritized:
            self.beta = min(self.beta + self.beta_increase, 1.)

        self.learn_step += 1
        if self.use_double_dqn and self.learn_step % self.n_replace_target == 0:
            self.target_network.update(self.network)
        return loss_value
コード例 #6
0
ファイル: dqn.py プロジェクト: danielhers/cnn
    def learn(self, batch_size):
        if self.prioritized:
            if not self.memory.is_full(): return -np.inf
            indices, exps, weights = self.memory.sample(batch_size, self.beta)
        else:
            exps = self.memory.sample(batch_size)
        obss, actions, rewards, obs_nexts, dones = self._process(exps)

        dy.renew_cg()
        target_network = self.target_network if self.use_double_dqn else self.network
        if self.dueling:
            target_values, v = target_network(obs_nexts, batched=True)
            target_values = target_values.npvalue() + v.npvalue()
        else:
            target_values = target_network(obs_nexts, batched=True)
            target_values = target_values.npvalue()
        target_values = np.max(target_values, axis=0)
        target_values = rewards + self.reward_decay * (target_values * (1 - dones))

        dy.renew_cg()
        if self.dueling:
            all_values_expr, v = self.network(obss, batched=True)
        else:
            all_values_expr = self.network(obss, batched=True)
        picked_values = dy.pick_batch(all_values_expr, actions)
        diff = (picked_values + v if self.dueling else picked_values) - dy.inputTensor(target_values, batched=True)
        if self.prioritized:
            self.memory.update(indices, np.transpose(np.abs(diff.npvalue())))
        losses = dy.pow(diff, dy.constant(1, 2))
        if self.prioritized:
            losses = dy.cmult(losses, dy.inputTensor(weights, batched=True))
        loss = dy.sum_batches(losses)
        loss_value = loss.npvalue()
        loss.backward()
        self.trainer.update()

        self.epsilon = max(self.epsilon - self.epsilon_decrease, self.epsilon_lower)
        if self.prioritized:
            self.beta = min(self.beta + self.beta_increase, 1.)

        self.learn_step += 1
        if self.use_double_dqn and self.learn_step % self.n_replace_target == 0:
            self.target_network.update(self.network)
        return loss_value
コード例 #7
0
ファイル: bilstm-tagger.py プロジェクト: aiedward/nn4nlp-code
def calc_reinforce_loss(words, tags, delta):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    word_reps = LSTM.transduce([LOOKUP[x] for x in words])

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)

    #calculate the probability distribution 
    scores = [dy.affine_transform([b, W, x]) for x in word_reps]
    losses = [dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags)]
    probs = [-dy.exp(loss).as_array() for loss in losses]

    #then take samples from the probability distribution
    samples = [np.random.choice(range(len(x)), p=x) for x in probs]

    #calculate accuracy=reward
    correct = [sample == tag for sample, tag in zip(samples, tags)]
    r_i = float(sum(correct))/len(correct)
    r = dy.constant((1), r_i)
    # Reward baseline for each word
    W_bl = dy.parameter(W_bl_p)
    b_bl = dy.parameter(b_bl_p)
    r_b = [dy.affine_transform([b_bl, W_bl, dy.nobackprop(x)]) for x in word_reps]

    #we need to take the value in order to break the computation graph
    #as the reward portion is trained seperatley and not backpropogated through during the overall score
    rewards_over_baseline = [(r - dy.nobackprop(x)) for x in r_b]
    #the scores for training the baseline
    baseline_scores = [dy.square(r - x) for x in r_b]

    #then calculate the reinforce scores using reinforce
    reinforce_scores = [r_s*score for r_s, score in zip(rewards_over_baseline, scores)]

    #we want the first len(sent)-delta scores from xent then delta scores from reinforce
    #for mixer
    if len(scores) > delta:
        mixer_scores = scores[:len(scores)-delta] + reinforce_scores[delta-1:]
    else:
        mixer_scores = reinforce_scores
    return dy.esum(mixer_scores), dy.esum(baseline_scores)
コード例 #8
0
  def GetQDScore(self, qwds, qw2v, qvecs, dwds, dw2v, dvecs, extra,
                 train=False):
    nq = len(qvecs)
    nd = len(dvecs)
    qgl = [self.W_gate.expr() *
           dy.concatenate([qv, dy.constant(1, self.idf_val(qw))])
           for qv, qw in zip(qvecs, qwds)]
    qgates = dy.softmax(dy.concatenate(qgl))

    sims = []
    for qv in qvecs:
      dsims = []
      for dv in dvecs:
        dsims.append(self.Cosine(qv, dv))
      sims.append(dsims)

    w2v_sims = []
    for qv in qw2v:
      dsims = []
      for dv in dw2v:
        dsims.append(self.Cosine(qv, dv))
      w2v_sims.append(dsims)

    matches = []
    for qw in qwds:
      dmatch = []
      for dw in dwds:
        dmatch.append(dy.ones(1) if qw == dw else dy.zeros(1))
      matches.append(dmatch)

    qscores = self.GetPOSIT(qvecs, sims, w2v_sims, matches)

    # Final scores and ultimate classifier.
    qterm_score = dy.dot_product(dy.concatenate(qscores), qgates)

    fin_score = (self.W_final.expr() * dy.concatenate([qterm_score,
                                                       extra]))
    return fin_score