예제 #1
0
def reinforce():
    if tf:
        global tf_step
    for src, tgt in tqdm(training_data,
                         mininterval=1,
                         desc="Reinforce-train",
                         leave=False):
        rl_optimizer.zero_grad()

        max_words = model.sample(src)
        s_words, props = model.sample(src, False)

        reward = rouge_l(s_words, tgt)
        baseline = rouge_l(max_words, tgt)

        advantage = reward - baseline

        loss = rl_criterion(props, s_words, tgt, advantage)

        loss.backward()
        rl_optimizer.step()
        if tf is not None:
            add_summary_value("reinforce loss", loss.data[0])
            add_summary_value("reinforce advantage", advantage.mean().data)
            add_summary_value("reinforce baseline", baseline.mean().data)
            add_summary_value("reinforce reward", reward.mean().data)
            tf_step += 1

            if tf_step % 100 == 0:
                tf_summary_writer.flush()
예제 #2
0
def train_actor_critic():
    loss_A = loss_C = .0
    actor.train()
    critic.train()
    global tf_step

    for imgs, labels in tqdm(training_data,
                             mininterval=1,
                             desc="Actor-Critic Training",
                             leave=False):
        optim_A.zero_grad()
        optim_C.zero_grad()

        enc = actor.encode(imgs)
        hidden_A = actor.feed_enc(enc)
        props_A, words_A = actor(hidden_A, labels)
        fixed_props_A = fix_variable(props_A)

        hidden_C = critic.feed_enc(enc)
        props_C, words_C = critic(words_A, hidden_C)

        scores_A, scores_C = rouge_l(words_A[:, 1:],
                                     labels), rouge_l(words_C, labels)

        fix_mask_rewards_A = mask_score(fixed_props_A, words_A[:, 1:],
                                        scores_A)
        mask_rewards_C = mask_score(props_C, words_C, scores_C)

        loss_c = critic.td_error(fix_mask_rewards_A, mask_rewards_C,
                                 criterion_C)
        loss_c.backward()
        loss_C += loss_c.data

        optim_C.clip_grad_norm()
        optim_C.step()

        _, sample_words, sample_props = actor.speak(hidden_A)
        loss_a, reward = criterion_AC(sample_props, sample_words,
                                      scores_C - scores_A)
        loss_a.backward()
        loss_A += loss_a.data

        optim_A.clip_grad_norm()
        optim_A.step()

        if tf is not None:
            add_summary_value("train critic loss", loss_c.data[0])
            add_summary_value("train actor loss", loss_a.data[0])
            add_summary_value("train actor reward", reward.data[0])
            tf_step += 1

            if tf_step % 100 == 0:
                tf_summary_writer.flush()

    loss_A = loss_A[0] / training_data.sents_size
    loss_C = loss_C[0] / training_data.sents_size

    return loss_A, loss_C
예제 #3
0
def train_actor_critic():
    loss_A = loss_C = .0
    actor.train()
    critic.train()

    for imgs, labels in tqdm(training_data,
                             mininterval=1,
                             desc="Actor-Critic Training",
                             leave=False):
        optim_A.zero_grad()
        optim_C.zero_grad()

        enc = actor.encode(imgs)
        hidden_A = actor.feed_enc(enc)
        props_A, words_A = actor(hidden_A, labels)
        fixed_props_A = fix_variable(props_A)

        hidden_C = critic.feed_enc(enc)
        props_C, words_C = critic(words_A, hidden_C)

        scores_A, scores_C = rouge_l(words_A[:, 1:],
                                     labels), rouge_l(words_C, labels)

        fix_mask_rewards_A = mask_score(fixed_props_A, words_A[:, 1:],
                                        scores_A)
        mask_rewards_C = mask_score(props_C, words_C, scores_C)

        loss_c = critic.td_error(fix_mask_rewards_A, mask_rewards_C,
                                 criterion_C)
        loss_c.backward()
        loss_C += loss_c.data

        optim_C.clip_grad_norm()
        optim_C.step()

        _g = mask_score(props_A, words_A[:, 1:], scores_A - scores_C)
        loss_a = criterion_A(_g, labels.view(-1))
        loss_a.backward()
        loss_A += loss_a.data

        optim_A.clip_grad_norm()
        optim_A.step()

    loss_A = loss_A[0] / training_data.sents_size
    loss_C = loss_C[0] / training_data.sents_size

    return loss_A, loss_C
예제 #4
0
def train_actor_critic():
    loss_A = loss_C = .0
    actor.train()
    critic.train()

    # for imgs, labels in tqdm(training_data,
    #         mininterval=1, desc="Actor-Critic Training", leave=False):
    for imgs, labels in training_data:
        optim_A.zero_grad()
        optim_C.zero_grad()

        enc = encode(imgs)[0]

        hidden_A = actor.feed_enc(enc)
        props_A, words_A = actor(hidden_A)

        fixed_props_A = Variable(props_A.data.new(*props_A.size()),
                                 requires_grad=False)
        fixed_props_A.data.copy_(props_A.data)

        hidden_C = critic.feed_enc(enc)
        props_C, words_C = critic(words_A, hidden_C)

        scores_A, scores_C = rouge_l(words_A[:, 1:],
                                     labels), rouge_l(words_C, labels)

        loss_c = critic.td_error(scores_A, scores_C, fixed_props_A, props_C,
                                 criterion_C)
        loss_c.backward()
        optim_C.step()
        loss_C += loss_c.data

        base = (scores_A - scores_C).mean()
        loss_a = criterion_A(props_A.view(-1, props_A.size(2)),
                             labels.view(-1)) * base
        loss_a.backward()
        optim_A.step()
        loss_A += loss_a.data

    loss_A = loss_A[0] / training_data.sents_size
    loss_C = loss_C[0] / training_data.sents_size

    return loss_A, loss_C
예제 #5
0
def pre_train_critic():
    iterations, total_loss = 0, .0
    actor.eval()
    critic.train()
    global tf_step
    for imgs, labels in tqdm(training_data,
                             mininterval=1,
                             desc="Pre-train Critic",
                             leave=False):
        optim_pre_C.zero_grad()

        enc = actor.encode(imgs)
        hidden_A = actor.feed_enc(enc)
        props_A, words_A = actor(hidden_A, labels)

        fixed_props_A = fix_variable(props_A)

        hidden_C = critic.feed_enc(enc)
        props_C, words_C = critic(words_A, hidden_C)

        scores_A, scores_C = rouge_l(words_A[:, 1:],
                                     labels), rouge_l(words_C, labels)
        mask_rewards_A = mask_score(fixed_props_A, words_A[:, 1:], scores_A)
        mask_rewards_C = mask_score(props_C, words_C, scores_C)

        loss = critic.td_error(mask_rewards_A, mask_rewards_C, criterion_C)
        loss.backward()
        total_loss += loss.data

        optim_pre_C.clip_grad_norm()
        optim_pre_C.step()

        iterations += 1
        if tf is not None:
            add_summary_value("pre-train critic loss", loss.data[0])
            tf_step += 1

            if tf_step % 100 == 0:
                tf_summary_writer.flush()

        if iterations == args.iterations: break

    return total_loss[0] / args.iterations
예제 #6
0
def pre_train_critic():
    iterations, total_loss = 0, .0
    actor.eval()
    critic.train()
    # for imgs, labels in tqdm(training_data,
    #         mininterval=1, desc="Pre-train Critic", leave=False):
    for imgs, labels in training_data:
        optim_pre_C.zero_grad()

        enc = encode(imgs)[0]

        hidden_A = actor.feed_enc(enc)
        props_A, words_A = actor(hidden_A)

        fixed_props_A = Variable(props_A.data.new(*props_A.size()),
                                 requires_grad=False)
        fixed_props_A.data.copy_(props_A.data)

        hidden_C = critic.feed_enc(enc)
        props_C, words_C = critic(words_A, hidden_C)

        scores_A, scores_C = rouge_l(words_A[:, 1:],
                                     labels), rouge_l(words_C, labels)

        loss = critic.td_error(scores_A, scores_C, fixed_props_A, props_C,
                               criterion_C)
        loss.backward()

        optim_pre_C.step()
        total_loss += loss.data

        iterations += 1

        if iterations == args.iterations: break

    return total_loss[0] / args.iterations
예제 #7
0
def rouge_score(session):
    assert nb_batch*conf.batch_size%conf.batch_size==0
    pred_sum=[]
    for m in range(0, nb_batch*conf.batch_size, conf.batch_size):
        pred = session.run(decoder_prediction,
                 feed_dict={encoder_inputs        : test_doc2id[m:m+conf.batch_size],
                            query_inputs          : test_query2id[m:m+conf.batch_size],
                            decoder_targets       : test_summ2id[m:m+conf.batch_size],
                            encoder_inputs_length : test_doc_len[m:m+conf.batch_size],
                            query_inputs_length   : test_que_len[m:m+conf.batch_size],
                            decoder_targets_length: test_sum_len[m:m+conf.batch_size],
                            sum_mask_tf           : test_sum_mask[m:m+conf.batch_size],
                            doc_mask_tf           : test_doc_mask[m:m+conf.batch_size],
                            que_mask_tf           : test_query_mask[m:m+conf.batch_size],
                            #embedding_placeholder : embedding_weights,
                            is_training           : False,
                           })
    
        pred_sum.extend(pred.tolist())
    
    assert len(pred_sum)==nb_batch*conf.batch_size
    rouge1_sum=[]
    rouge2_sum=[]
    rougel_sum=[]
    for i in range(nb_batch*conf.batch_size):
        pred_temp=[]
        ref_temp=[]
        for id_ in pred_sum[i]:
            if id_==1: break
            pred_temp.append(str(id_))
        
        for id_ in test_summ2id[i]:
            if id_==1: break
            ref_temp.append(str(id_))
        
        if pred_temp==[] or ref_temp==[]:
            continue
        
        rouge1_sum.append(rouge.rouge_n(pred_temp, ref_temp, n=1)[-1])
        rouge2_sum.append(rouge.rouge_n(pred_temp, ref_temp, n=2)[-1])
        rougel_sum.append(rouge.rouge_l(pred_temp, ref_temp))
        
     
    #print "rouge_1:,rouge1_sum/float(split))
    #print "rouge_2:%f"%(rouge2_sum/float(split))
    #print "rouge_l:%f"%(rougel_sum/float(split))
    return np.mean(rouge1_sum), np.mean(rouge2_sum), np.mean(rougel_sum), \
           np.std(rouge1_sum) , np.std(rouge2_sum), np.std(rougel_sum), pred_sum
예제 #8
0
파일: train.py 프로젝트: zxsted/torch_light
def train_actor_critic():
    actor.train()
    critic.train()
    if tf:
        global tf_step

    for imgs, labels in tqdm(training_data,
                             mininterval=1,
                             desc="Actor-Critic Training",
                             leave=False):
        optim_A.zero_grad()
        optim_C.zero_grad()

        enc = actor.encode(imgs)
        hidden_A = actor.feed_enc(enc)
        target, words = actor(hidden_A)
        policy_values = rouge_l(words, labels)

        hidden_C = critic.feed_enc(enc)
        estimated_values = critic(words, hidden_C)

        loss_c = criterion_C(estimated_values, policy_values)
        loss_c.backward()
        optim_C.clip_grad_norm()
        optim_C.step()

        reward = torch.mean(policy_values - estimated_values)

        loss_a = criterion_A(target.view(-1, target.size(2)), labels.view(-1))
        loss_a.backward()
        optim_A.clip_grad_norm()
        optim_A.step(reward)

        if tf is not None:
            add_summary_value("train critic loss", loss_c[0])
            add_summary_value("train actor loss", loss_a.data[0])
            add_summary_value("train actor reward", reward.data)
            add_summary_value("train critic score",
                              estimated_values.data.mean())
            add_summary_value("train actor score", policy_values.data.mean())
            tf_step += 1

            if tf_step % 100 == 0:
                tf_summary_writer.flush()
예제 #9
0
파일: train.py 프로젝트: zxsted/torch_light
def eval():
    actor.eval()
    eval_score = .0
    for imgs, labels in tqdm(validation_data,
                             mininterval=1,
                             desc="Actor-Critic Eval",
                             leave=False):
        enc = actor.encode(imgs)

        hidden = actor.feed_enc(enc)
        words, _ = actor.speak(hidden)

        scores = rouge_l(words, labels)
        scores = scores.sum()

        eval_score += scores.data

    eval_score = eval_score[0] / validation_data.sents_size

    return eval_score
예제 #10
0
def eval():
    actor.eval()
    eval_loss = eval_score = .0
    # for imgs, labels in tqdm(validation_data,
    #         mininterval=1, desc="Actor-Critic Training", leave=False):
    for imgs, labels in validation_data:
        enc = encode(imgs)[0]

        hidden = actor.feed_enc(enc)
        props, words = actor(hidden)

        loss = criterion_A(props.view(-1, props.size(2)), labels.view(-1))
        scores = rouge_l(words[:, 1:], labels)
        scores = scores.sum()

        eval_loss += loss.data
        eval_score += scores

    eval_loss = eval_loss[0] / validation_data.sents_size
    eval_score = eval_score[0] / validation_data.sents_size

    return eval_loss, eval_score
예제 #11
0
파일: train.py 프로젝트: zxsted/torch_light
def pre_train_critic():
    iterations = 0
    actor.eval()
    critic.train()
    if tf:
        global tf_step
    for imgs, labels in tqdm(training_data,
                             mininterval=1,
                             desc="Pre-train Critic",
                             leave=False):
        optim_pre_C.zero_grad()

        enc = actor.encode(imgs)
        hidden_A = actor.feed_enc(enc)
        # we pre-train the critic network by feeding it with sampled actions from the fixed pre-trained actor.
        _, words = actor(hidden_A)
        policy_values = rouge_l(words, labels)

        hidden_C = critic.feed_enc(enc)
        estimated_values = critic(words, hidden_C)
        loss = criterion_C(estimated_values, policy_values)

        loss.backward()
        optim_pre_C.clip_grad_norm()
        optim_pre_C.step()

        iterations += 1
        if tf is not None:
            add_summary_value("pre-train critic loss", loss.data[0])
            tf_step += 1

            if tf_step % 100 == 0:
                tf_summary_writer.flush()

        if iterations == args.iterations:
            break