Beispiel #1
0
def eval(epoch, eval_type='valid'):
    #decode_type : greedy or beam_search

    total_log_loss, total = 0., 0

    y_logistic, y = [], []

    if eval_type == 'valid':
        loader = validloader
        E.is_split_label = False
    else:
        loader = testloader
        E.is_split_label = config.eval_metrics_split_labels

    model.eval()
    for raw_src, src, src_len, raw_tgt, tgt_vec, tgt_rnn in loader:
        if use_cuda:
            src = src.cuda()
            tgt_vec = tgt_vec.cuda()

        log_output = model(src.transpose(0, 1), src_len)

        log_loss = logit_loss(
            log_output, tgt_vec[:, :tgt_vocab.size()]) * config.logistic_weight

        total_log_loss += log_loss.item()
        total += 1

        y_logistic.append(log_output.detach().cpu().numpy())
        y.append(tgt_vec.cpu().numpy()[:, :tgt_vocab.size()])

    logging("{} log loss :{:.5f}\n".format(eval_type, total_log_loss / total))
    if eval_type == 'valid':
        logging_valid_loss([epoch, updates, total_log_loss / total])

    def get_score(y, y_score, typ):
        logging("-" * 20 + typ + '-' * 20 + '\n')
        loss_dict = E.compute(y, y_score)
        logging(E.logging(loss_dict))
        return loss_dict

    y = np.vstack(y)
    y_score = np.vstack(y_logistic)
    np.save(os.path.join(config.log, 'y_score.npy'), y_score)
    np.save(os.path.join(config.log, 'y.npy'), y)

    E.set_thres(0.5)
    get_score(y, y_score, 'Logistic')

    if eval_type == 'valid':
        global threshold
        _, threshold = E.find_best_thres(y, y_score)
    E.set_thres(threshold)
    loss_d = get_score(y, y_score, 'Logistic')

    logging('-' * 50 + '\n')

    return loss_d
Beispiel #2
0
def train(epoch):
    global updates
    model.train()
    total_log_loss, total = 0., 0

    if config.schedule:
        scheduler.step()
        print("Decaying learning rate to %g" % scheduler.get_lr()[0])

    for raw_src, src, src_len, raw_tgt, tgt_vec, tgt_rnn in trainloader:
        if use_cuda:
            src = src.cuda()
            tgt_vec = tgt_vec.cuda()
        model.zero_grad()

        log_output = model(src.transpose(0, 1), src_len)

        log_loss = logit_loss(
            log_output, tgt_vec[:, :tgt_vocab.size()]) * config.logistic_weight

        losses = log_loss
        losses.backward()
        optim.step()

        total_log_loss += log_loss.item()
        total += 1
        updates += 1

        if updates % config.print_interval == 0:
            logging(time.strftime("[%H:%M:%S]", time.localtime()))
            logging(" Epoch: %3d, updates: %8d\n" % (epoch, updates))
            logging("Log loss : {:.5f}\n".format(total_log_loss / total))
            logging_train_loss([epoch, updates, total_log_loss / total])
            total_log_loss, total = 0., 0

        if updates % config.eval_interval == 0:
            ## TODO different model will have different decoding strategies
            score = eval(epoch, 'valid')
            logging_metric(score, epoch, updates)
            for metric, value in score.items():
                scores[metric].append(score[metric])
                if metric == standard_metric and score[metric] >= max(
                        scores[metric]):
                    save_model(log_path + 'best_' + metric + '_checkpoint.pt')
            save_model(log_path + 'checkpoint.pt')

            model.train()
def eval(epoch, decode_type='greedy', logistic_joint_decoding=False):
    #decode_type : greedy or beam_search

    total_rnn_loss, total_log_loss, total = 0., 0., 0.

    y_logistic, y_rnn, y_rescore, y = [], [], [], []

    if decode_type == 'beam_search':
        eval_model = AttEnc_DecRNN.AttEnc_DecRNN(
            encoder, top_k_decoder.TopKDecoder(model.decoder,
                                               config.beam_size))
    elif decode_type == 'greedy':
        model.decoder.set_sampling_type('max')
        eval_model = model

    eval_model.eval()
    for items, label_set_rnn, label_set_vec in testloader:
        items = items.float()
        if use_cuda:
            items = items.cuda()
            label_set_rnn = label_set_rnn.cuda()
            label_set_vec = label_set_vec.cuda()

        decoder_outputs, decoder_hidden, ret_dict, log_output = eval_model(
            items, logistic_joint_decoding=logistic_joint_decoding)

        if config.loss_type.lower() == 'vanilla':
            label_sets = label_set_rnn
        else:
            label_sets = label_set_vec

        rnn_loss = Loss(decoder_outputs, ret_dict['sequence'], label_sets)
        log_loss = logit_loss(
            log_output,
            label_set_vec[:, :config.label_set_size]) * config.logistic_weight

        total_log_loss += log_loss.item()
        total_rnn_loss += rnn_loss.item()
        total += 1

        y_vec = E.idx2vec(ret_dict['sequence'], config.label_set_size,
                          config.label_set_size + 1, True)
        y_rnn.append(y_vec)
        y_logistic.append(log_output.detach().cpu().numpy())
        y.append(label_set_vec.cpu().numpy()[:, :config.label_set_size])

        if decode_type == 'beam_search':
            seq, score = rescore.logistic_rescore(ret_dict['topk_sequence'],
                                                  log_output)
            y_vec = E.idx2vec(seq, config.label_set_size,
                              config.label_set_size + 1, True)
            y_rescore.append(y_vec)
    logging("Decode type: {} , Logistic joint Decoding: {}\n".format(
        decode_type, logistic_joint_decoding))
    logging("Test RNN loss : {:.5f}  \nLog loss :{:.5f}\n".format(
        total_rnn_loss / total, total_log_loss / total))

    E.set_thres(0.5)

    def get_score(y, y_score, typ):
        y_np = np.vstack(y)
        y_score_np = np.vstack(y_score)
        logging("-" * 20 + typ + '-' * 20 + '\n')
        loss_dict = E.compute(y_np, y_score_np)
        logging(E.logging(loss_dict))
        return loss_dict

    loss_d = get_score(y, y_rnn, 'RNN')

    get_score(y, y_logistic, 'Logistic')
    if decode_type == 'beam_search':
        get_score(y, y_rescore, 'Logistic Rescore')

    logging('-' * 50 + '\n')

    return loss_d
def train(epoch):
    model.train()
    model.decoder.set_sampling_type(config.decoder_sampling_type)
    global updates

    if config.schedule:
        scheduler.step()
        print("Decaying learning rate to %g" % scheduler.get_lr()[0])

    if config.loss_type.lower() == 'vanilla':
        if epoch > config.teacher_forcing_final_epoch:
            teacher_forcing_ratio = config.teacher_forcing_ratio_end
        else:
            teacher_forcing_ratio = config.teacher_forcing_ratio_start + (config.teacher_forcing_ratio_end - config.teacher_forcing_ratio_start) \
                / config.teacher_forcing_final_epoch * (epoch-1)
        logging("Teacher forcing ratio: " + str(teacher_forcing_ratio) + '\n')
    else:
        teacher_forcing_ratio = 0

    total_log_loss, total_rnn_loss, total = 0., 0., 0

    for items, label_set_rnn, label_set_vec in trainloader:
        items = items.float()
        if use_cuda:
            items = items.cuda()
            label_set_rnn = label_set_rnn.cuda()
            label_set_vec = label_set_vec.cuda()

        model.zero_grad()

        target_variable = None
        candidates = None
        label_sets = label_set_vec.clone()

        if config.loss_type.lower() == 'vanilla':
            target_variable = label_set_rnn
            label_sets = label_set_rnn
        elif config.loss_type.lower() == 'order_free':
            candidates = label_set_vec.clone()

        decoder_outputs, decoder_hidden, ret_dict, log_output = model(
            items,
            target_variable=target_variable,
            candidates=candidates,
            teacher_forcing_ratio=teacher_forcing_ratio)
        rnn_loss = Loss(decoder_outputs, ret_dict['sequence'], label_sets)
        log_loss = logit_loss(
            log_output,
            label_set_vec[:, :config.label_set_size]) * config.logistic_weight
        losses = rnn_loss + log_loss
        losses.backward()
        optim.step()
        total_log_loss += log_loss.item()
        total_rnn_loss += rnn_loss.item()

        total += 1
        updates += 1

        if updates % 1000 == 0:
            logging(time.strftime("[%H:%M:%S]", time.localtime()))
            logging(" Epoch: %3d, updates: %8d\n" % (epoch, updates))
            logging("Training loss : {:.5f} \nLog loss : {:.5f}\n".format(
                total_rnn_loss / total, total_log_loss / total))

    logging(time.strftime("[%H:%M:%S]", time.localtime()))
    logging(" Epoch: %3d, updates: %8d\n" % (epoch, updates))
    logging("Training loss : {:.5f} \nLog loss : {:.5f}\n".format(
        total_rnn_loss / total, total_log_loss / total))

    if config.loss_type.lower() == 'ocd':
        Loss.update_temperature(epoch)
    score = eval(epoch, 'greedy', False)
    score = eval(epoch, 'greedy', True)
    #score_bs =  eval(epoch, 'beam_search', config.logistic_joint_decoding)

    for metric, value in score.items():
        scores[metric].append(score[metric])
        if metric == standard_metric and score[metric] >= max(scores[metric]):
            save_model(log_path + 'best_' + metric + '_checkpoint.pt')
def eval(epoch, eval_type = 'valid', decode_type = 'greedy', 
         logistic_joint_decoding = False):
    #decode_type : greedy or beam_search 
    
    total_rnn_loss, total_log_loss, total = 0.,0., 0.
    
    y_logistic, y_rnn, y_rescore, y  = [], [], [], []
    
    if eval_type == 'valid':
        loader = validloader
        E.is_split_label = False
    else:
        loader = testloader
        E.is_split_label = config.eval_metrics_split_labels

    model.decoder.set_sampling_type('max') 
    if decode_type == 'beam_search':
        topk_decoder = top_k_decoder.TopKDecoder(model.decoder, config.beam_size, config.beam_score_type)
        eval_model =  seq2seq.Seq2seq(encoder, topk_decoder, decoderFC) 
    elif decode_type == 'greedy':
        eval_model = model
    
    eval_model.eval()
    for raw_src, src, src_len, raw_tgt, tgt_vec, tgt_rnn in loader:
        if use_cuda:
            src = src.cuda()
            tgt_vec = tgt_vec.cuda()
            tgt_rnn = tgt_rnn.cuda()
        
        decoder_outputs, decoder_hidden, ret_dict, log_output = eval_model(src.transpose(0,1), src_len, 
                                                                           logistic_joint_decoding = logistic_joint_decoding)
        
        if config.loss_type.lower() == 'vanilla':
            label_sets = tgt_rnn
        else:
            label_sets = tgt_vec.clone()

        rnn_loss = Loss(decoder_outputs, ret_dict['sequence'], label_sets)
        log_loss = logit_loss(log_output, tgt_vec[:,:tgt_vocab.size()]) * config.logistic_weight
        
        total_log_loss += log_loss.item()
        total_rnn_loss += rnn_loss.item()
        total += 1

        y_vec = E.idx2vec(ret_dict['sequence'], tgt_vocab.size(), tgt_vocab.size()+1, True)
        y_rnn.append(y_vec)
        y_logistic.append(log_output.detach().cpu().numpy())
        y.append(tgt_vec.cpu().numpy()[:,:tgt_vocab.size()])

        if decode_type == 'beam_search':
            seq, score = rescore.logistic_rescore(ret_dict['topk_sequence'], log_output)
            y_vec = E.idx2vec(seq, tgt_vocab.size(), tgt_vocab.size() +1 , True)
            y_rescore.append(y_vec)
    logging("Decode type: {} , Logistic joint Decoding: {}\n".format(decode_type, logistic_joint_decoding))
    logging("{} RNN loss : {:.5f}  \nLog loss :{:.5f}\n".format(eval_type, total_rnn_loss / total, total_log_loss / total))
    
    if eval_type == 'valid' and logistic_joint_decoding is False:
        logging_valid_loss([epoch,updates,total_log_loss / total, total_rnn_loss / total])
    
    y_np = np.vstack(y)
    y_logistic_np = np.vstack(y_logistic)
    y_rnn_np = np.vstack(y_rnn)
    
    E.set_thres(0.5)
    
    def get_score(y_np, y_score_np, typ):
        logging("-"*20 + typ + '-'*20 + '\n')
        loss_dict = E.compute(y_np, y_score_np)
        logging(E.logging(loss_dict))
        return loss_dict
    
    score_rnn = get_score(y_np, y_rnn_np, 'RNN')
    get_score(y_np, y_logistic_np, 'Logistic')
    
    ## threshold 
    if eval_type == 'valid': 
        global threshold
        _,threshold = E.find_best_thres(y_np, y_logistic_np) 
    E.set_thres(threshold)
    score_logistic = get_score(y_np, y_logistic_np, 'Logistic')
    
    score_rescore = None
    if decode_type == 'beam_search':
        y_rescore_np = np.vstack(y_rescore)
        score_rescore = get_score(y_np, y_rescore_np, 'Logistic Rescore')
    
    logging('-'*50+'\n')
    
    return score_rnn, score_logistic
def train(epoch):
    global updates
    model.train()
    model.decoder.set_sampling_type(config.decoder_sampling_type)
    total_log_loss, total_rnn_loss, total = 0., 0., 0
    #optim.updateLearningRate(None, epoch)
    if config.schedule:
        scheduler.step()
        print("Decaying learning rate to %g" % scheduler.get_lr()[0])
    
    ## Update Teacher Forcing ratio
    if config.loss_type.lower() == 'vanilla' or config.loss_type.lower() == 'order_free':
        if epoch > config.teacher_forcing_final_epoch:
            teacher_forcing_ratio = config.teacher_forcing_ratio_end 
        else:
            teacher_forcing_ratio = config.teacher_forcing_ratio_start + (config.teacher_forcing_ratio_end - config.teacher_forcing_ratio_start) / config.teacher_forcing_final_epoch * (epoch-1)
        logging("Teacher forcing ratio: " + str(teacher_forcing_ratio) + '\n')
    else:
        teacher_forcing_ratio = 0
    # Update Temperature
    if config.loss_type.lower() == 'ocd':
        Loss.update_temperature(epoch)
    
    for raw_src, src, src_len, raw_tgt, tgt_vec, tgt_rnn in trainloader:
        if use_cuda:
            src = src.cuda()
            tgt_vec = tgt_vec.cuda()
            tgt_rnn = tgt_rnn.cuda()
        model.zero_grad()
        
        target_variable = None
        candidates = tgt_vec.clone()
        label_sets = tgt_vec.clone()
        
        if config.loss_type.lower() == 'vanilla':
            target_variable = tgt_rnn
            label_sets = tgt_rnn
        
        decoder_outputs, decoder_hidden, ret_dict, log_output = model(src.transpose(0,1), src_len, 
                                                                target_variable = target_variable, 
                                                                candidates = candidates,
                                                                teacher_forcing_ratio=teacher_forcing_ratio)

        rnn_loss = Loss(decoder_outputs, ret_dict['sequence'], label_sets)  
        
        log_loss = logit_loss(log_output, tgt_vec[:,:tgt_vocab.size()]) * config.logistic_weight

        losses = rnn_loss + log_loss
        losses.backward()
        optim.step()

        total_log_loss += log_loss.item()
        total_rnn_loss += rnn_loss.item()
        total += 1
        updates += 1

        if updates % config.print_interval == 0:
            logging(time.strftime("[%H:%M:%S]", time.localtime()))
            logging(" Epoch: %3d, updates: %8d\n" % (epoch, updates))
            logging("RNN loss : {:.5f} \nLog loss : {:.5f}\n".format(total_rnn_loss / total, total_log_loss / total))
            logging_train_loss([epoch,updates,total_log_loss / total, total_rnn_loss / total])
            total_log_loss, total_rnn_loss, total = 0., 0., 0
        
        if updates % config.eval_interval == 0:
            ## TODO different model will have different decoding strategies
            score_rnn, score_logistic = eval(epoch, 'valid', 'greedy', False)
            logging_metric(score_rnn, epoch, updates)
            logging_metric_logistic(score_logistic, epoch, updates)
            if config.logistic_weight > 0:
                score_joint,_ =  eval(epoch, 'valid', 'beam_search', True)
                logging_metric_joint(score_joint , epoch, updates)
            score = score_rnn
            #eval(epoch, 'test', 'greedy', True)
            for metric, value in score.items():
                scores[metric].append(score[metric])
                if metric == standard_metric and score[metric] >= max(scores[metric]):  
                    save_model(log_path+'best_'+metric+'_checkpoint.pt')
            save_model(log_path+'checkpoint.pt')
    
            model.train()
            model.decoder.set_sampling_type(config.decoder_sampling_type)