예제 #1
0
def train_adaptive(rank,
                   machine,
                   max_beam_size,
                   lr,
                   shared_model,
                   counter,
                   lock,
                   optimizer,
                   eval_data_X, eval_data_Y, index2word, index2label,
                   suffix, result_path, decode_method, beam_size,
                   reward_coef_fscore, reward_coef_beam_size,
                   f_score_index_begin,
                   args,
                   ):
  torch.manual_seed(123 + rank)

  # create adative model
  model = AdaptiveActorCritic(max_beam_size=max_beam_size, action_space=3)
  if optimizer is None:
    optimizer = optim.Adam(shared_model.parameters(), lr=lr)
  model.train()

  batch_num = len(eval_data_X)
  instance_num = 0
  beam_size_seqs = []

  for batch in eval_data_X:
    instance_num += len(batch)

  for epoch in range(1, args.n_epochs + 1):
    print("Epoch: {} of training process {}".format(epoch, rank))

    desc = result_path + '_process_' + str(rank) + '_' + str(epoch) + '_'
    if result_path:
      f_sen = open(os.path.join(args.logdir,
                                desc + "sen_" + suffix + ".txt"), 'w')
      f_pred = open(os.path.join(args.logdir,
                                 desc + "pred_" + suffix + ".txt"), 'w')
      f_label = open(os.path.join(args.logdir,
                                  desc + "label_" + suffix + ".txt"), 'w')
      f_result_processed = \
        open(os.path.join(args.logdir,
                          desc + "result_processed_" + suffix + ".txt"), 'w')
      f_beam_size = \
        open(os.path.join(args.logdir,
                          desc + 'beam_size_' + suffix + ".txt"), 'w')

    true_pos_count = 0
    pred_pos_count = 0
    true_pred_pos_count = 0

    # shuffle
    batch_idx_list = range(batch_num)
    batch_idx_list = np.random.permutation(batch_idx_list)

    for batch_idx in batch_idx_list:
      sen = eval_data_X[batch_idx]
      label = eval_data_Y[batch_idx]

      current_batch_size = len(sen)
      current_sen_len = len(sen[0])



      # DEBUG
      # print(batch_idx, current_sen_len)
      if current_sen_len < 3:  # ignore sentence having tiny length
        continue



      sen_var = Variable(torch.LongTensor(sen))
      label_var = Variable(torch.LongTensor(label))

      if machine.gpu:
        sen_var = sen_var.cuda()
        label_var = label_var.cuda()

      # Initialize the hidden and cell states
      # The axes semantics are
      # (num_layers * num_directions, batch_size, hidden_size)
      # So 1 for single-directional LSTM encoder,
      # 2 for bi-directional LSTM encoder.
      init_enc_hidden = Variable(
        torch.zeros((2, current_batch_size, machine.hidden_dim)))
      init_enc_cell = Variable(
        torch.zeros((2, current_batch_size, machine.hidden_dim)))

      if machine.gpu:
        init_enc_hidden = init_enc_hidden.cuda()
        init_enc_cell = init_enc_cell.cuda()

      enc_hidden_seq, (enc_hidden_out, enc_cell_out) = machine.encode(sen_var,
                                                                   init_enc_hidden,
                                                                   init_enc_cell)

      # The semantics of enc_hidden_out is (num_layers * num_directions,
      # batch, hidden_size), and it is "tensor containing the hidden state
      # for t = seq_len".
      #
      # Here we use a linear layer to transform the two-directions of the dec_hidden_out's into a single hidden_dim vector, to use as the input of the decoder
      init_dec_hidden = machine.enc2dec_hidden(
        torch.cat([enc_hidden_out[0], enc_hidden_out[1]], dim=1))
      init_dec_cell = machine.enc2dec_cell(
        torch.cat([enc_cell_out[0], enc_cell_out[1]], dim=1))

      # ===================================
      if decode_method == "adaptive":
        # the input argument "beam_size" serves as initial_beam_size here
        # TODO: implement this here
        label_pred_seq, accum_logP_pred_seq, logP_pred_seq, \
        attention_pred_seq, episode, sen_beam_size_seq = \
          decode_one_sentence_adaptive_rl(machine,
          current_sen_len, init_dec_hidden, init_dec_cell, enc_hidden_seq,
          beam_size, max_beam_size, model, shared_model, reward_coef_fscore,
          reward_coef_beam_size, label_var, f_score_index_begin, counter, lock,
          optimizer, args)

      else:
        raise Exception("Not implemented!")
      # ===================================


      # update beam seq
      beam_size_seqs += sen_beam_size_seq

      ### Debugging...
      # print("input sentence =", sen)
      # print("true label =", label)
      # print("predicted label =", label_pred_seq)
      # print("episode =", episode)

      for label_index in range(f_score_index_begin, machine.label_size):
        true_pos = (label_var == label_index)
        true_pos_count += true_pos.float().sum()

        pred_pos = (label_pred_seq == label_index)
        pred_pos_count += pred_pos.float().sum()

        true_pred_pos = true_pos & pred_pos
        true_pred_pos_count += true_pred_pos.float().sum()

      # Write result into file
      if result_path:
        if machine.gpu:
          label_pred_seq = label_pred_seq.cpu()

        label_pred_seq = label_pred_seq.data.numpy().tolist()

        # Here label_pred_seq.shape = (batch size, sen len)

        # sen, label, label_pred_seq are list of lists,
        # thus I would like to flatten them for iterating easier

        sen = list(itertools.chain.from_iterable(sen))
        label = list(itertools.chain.from_iterable(label))
        label_pred_seq = list(itertools.chain.from_iterable(label_pred_seq))
        assert len(sen) == len(label) and len(label) == len(label_pred_seq)
        for i in range(len(sen)):
          f_sen.write(str(sen[i]) + '\n')
          f_label.write(str(label[i]) + '\n')
          f_pred.write(str(label_pred_seq[i]) + '\n')

          # clean version (does not print <PAD>, print a newline instead of <EOS>)
          # if sen[i] != 0 and sen[i] != 2: # not <PAD> and not <EOS>
          # if sen[i] != 0: # not <PAD>

          result_sen = index2word[sen[i]]
          result_label = index2label[label[i]]
          result_pred = index2label[label_pred_seq[i]]
          f_result_processed.write(
            "%s %s %s\n" % (result_sen, result_label, result_pred))

          f_sen.flush()
          f_label.flush()
          f_pred.flush()
          f_result_processed.flush()

        if decode_method == "adaptive":
          beam_size_seq_str = ' '.join(map(str, sen_beam_size_seq))
          f_beam_size.write(beam_size_seq_str + '\n')
          f_beam_size.flush()

    # End for batch_idx

    if machine.gpu:
      true_pos_count = true_pos_count.cpu()
      pred_pos_count = pred_pos_count.cpu()
      true_pred_pos_count = true_pred_pos_count.cpu()

    true_pos_count = true_pos_count.data.numpy()[0]
    pred_pos_count = pred_pos_count.data.numpy()[0]
    true_pred_pos_count = true_pred_pos_count.data.numpy()[0]

    precision = true_pred_pos_count / pred_pos_count if pred_pos_count > 0 else 0

    recall = true_pred_pos_count / true_pos_count if true_pos_count > 0 else 0
    fscore = 2 / (1 / precision + 1 / recall) if (
      precision > 0 and recall > 0) else 0
    fscore = fscore * 100

    if result_path:
      f_sen.close()
      f_pred.close()
      f_label.close()
      f_result_processed.close()
      f_beam_size.close()

    avg_beam_sizes = sum(beam_size_seqs) / float(len(beam_size_seqs))
    print("Epoch {} of process {}: Avg beam size: {}".format(epoch,
                                                             rank,
                                                             avg_beam_sizes))
    print("Epoch {} of process {}: Avg Fscore = {}".format(epoch,
                                                           rank,
                                                           fscore))
예제 #2
0
def train_adaptive(rank, machine, max_beam_size, shared_model, optimizer,
                   data_X, data_Y, index2word, index2label, suffix,
                   decode_method, beam_size, reward_coef_fscore,
                   reward_coef_beam_size, f_score_index_begin, args):
    torch.manual_seed(123 + rank)

    logfile = open(os.path.join(args.logdir, "log_" + str(rank) + ".txt"),
                   "w+")

    # create adaptive model
    model = AdaptiveActorCritic(max_beam_size=max_beam_size, action_space=3)
    # If a shared_optimizer is not passed in
    if optimizer is None:
        optimizer = optim.Adam(shared_model.parameters(), lr=args.lr)
    # torch.nn.modules.module:
    # Sets the module in training mode
    model.train()

    batch_num = len(data_X)

    for epoch in range(0, args.n_epochs):
        reward_list = []

        # shuffle
        batch_idx_list = range(batch_num)
        batch_idx_list = np.random.permutation(batch_idx_list)

        time_begin = time.time()
        for batch_idx in batch_idx_list:
            sen = data_X[batch_idx]
            label = data_Y[batch_idx]

            current_batch_size = len(sen)
            current_sen_len = len(sen[0])

            # DEBUG
            # print(batch_idx, current_sen_len)
            if current_sen_len < 3:  # ignore sentence having tiny length
                continue

            sen_var = Variable(torch.LongTensor(sen))
            label_var = Variable(torch.LongTensor(label))

            if machine.gpu:
                sen_var = sen_var.cuda()
                label_var = label_var.cuda()

            # Initialize the hidden and cell states
            # The axes semantics are
            # (num_layers * num_directions, batch_size, hidden_size)
            # So 1 for single-directional LSTM encoder,
            # 2 for bi-directional LSTM encoder.
            init_enc_hidden = Variable(
                torch.zeros((2, current_batch_size, machine.hidden_dim)))
            init_enc_cell = Variable(
                torch.zeros((2, current_batch_size, machine.hidden_dim)))

            if machine.gpu:
                init_enc_hidden = init_enc_hidden.cuda()
                init_enc_cell = init_enc_cell.cuda()

            enc_hidden_seq, (enc_hidden_out, enc_cell_out) = machine.encode(
                sen_var, init_enc_hidden, init_enc_cell)

            # The semantics of enc_hidden_out is (num_layers * num_directions,
            # batch, hidden_size), and it is "tensor containing the hidden state
            # for t = seq_len".
            #
            # Here we use a linear layer to transform the two-directions of the dec_hidden_out's into a single hidden_dim vector, to use as the input of the decoder
            init_dec_hidden = machine.enc2dec_hidden(
                torch.cat([enc_hidden_out[0], enc_hidden_out[1]], dim=1))
            init_dec_cell = machine.enc2dec_cell(
                torch.cat([enc_cell_out[0], enc_cell_out[1]], dim=1))

            # ===================================
            if decode_method == "adaptive":
                # the input argument "beam_size" serves as initial_beam_size here
                # TODO: implement this here
                label_pred_seq, accum_logP_pred_seq, logP_pred_seq, \
                attention_pred_seq, episode, sen_beam_size_seq, total_reward = \
                  decode_one_sentence_adaptive_rl(machine,
                  current_sen_len, init_dec_hidden, init_dec_cell, enc_hidden_seq,
                  beam_size, max_beam_size, model, shared_model, reward_coef_fscore,
                  reward_coef_beam_size, label_var, f_score_index_begin,
                  optimizer, args)

                reward_list.append(total_reward)

            else:
                raise Exception("Not implemented!")
            # ===================================

            # update beam seq
            #beam_size_seqs.append(sen_beam_size_seq)

            ### Debugging...
            # print("input sentence =", sen)
            # print("true label =", label)
            # print("predicted label =", label_pred_seq)
            # print("episode =", episode)
        # End for batch_idx
        time_end = time.time()
        time_used = time_end - time_begin

        reward_list = np.array(reward_list)
        reward_mean = np.mean(reward_list)
        reward_std = np.std(reward_list)
        log_msg = "%d\t%f\t%f\t%f" % (epoch, reward_mean, reward_std,
                                      time_used)
        print(log_msg)
        #print(log_msg, file=logfile, flush=True)

        # Save shared model and (supposedly) shared optimizer
        # Purposely possibly over-writing other threads' model for the same epoch
        checkpoint_filename = os.path.join(args.logdir,
                                           "ckpt_" + str(epoch) + ".pth")

        torch.save(
            {
                'epoch': epoch,
                'state_dict': shared_model.state_dict(),
                'optimizer': optimizer.state_dict()
            }, checkpoint_filename)
    # End for epoch
    logfile.close()