Ejemplo n.º 1
0
def main():
    # ====== preprocess ====== #
    args = preprocess()

    # ====== Loading dataset ====== #
    train_data, dev_data, test_data, joint_vocabs, parsing_vocabs = load_data(
        args.joint_input, args.parsing_input, args.batch_size,
        args.accum_steps, args.shuffle, args.num_workers, args.drop_last)
    # cross_labels_idx = generate_cross_labels_idx(vocabs['labels'])

    # ======= Preparing Model ======= #
    print("\nModel Preparing starts...")
    model = JointEncoderModel(
        joint_vocabs,
        parsing_vocabs,
        # cross_labels_idx,
        # Embedding
        args.subword,
        args.use_pos_tag,
        args.bert_path,
        args.transliterate,
        args.d_model,
        args.partition,
        args.pos_tag_emb_dropout,
        args.position_emb_dropout,
        args.bert_emb_dropout,
        args.emb_dropout,
        # Encoder
        args.layer_num,
        args.hidden_dropout,
        args.attention_dropout,
        args.dim_ff,
        args.nhead,
        args.kqv_dim,
        # classifier
        args.label_hidden,
        # loss
        args.lambda_scaler,
        args.alpha_scaler,
        args.language,
        args.device).cuda()
    # print(model, end='\n\n\n')
    optimizer = Optim(model, args.optim, args.lr, args.lr_fine_tune,
                      args.warmup_steps, args.lr_decay_factor,
                      args.weight_decay, args.clip_grad,
                      args.clip_grad_max_norm)
    optimizer.zero_grad()
    # if args.freeze_bert:
    #     optimizer.set_freeze_by_idxs([str(num) for num in range(0, config.freeze_bert_layers)], True)
    #     optimizer.free_embeddings()
    #     optimizer.freeze_pooler()
    #     print('freeze model of BERT %d layers' % config.freeze_bert_layers)

    # ========= Training ========= #
    print('Training starts...')
    start = time.time()
    steps, loss_value, total_batch_size = 1, 0., 0
    best_dev, best_test = None, None
    patience = args.patience
    for epoch_i in range(1, args.epoch):
        for batch_i, insts in enumerate(train_data, start=1):
            model.train()

            insts, batch_size, max_len = batch_filter(
                insts, args.language, args.DATASET_MAX_SNT_LENGTH)
            insts_list = batch_spliter(insts, max_len,
                                       args.BATCH_MAX_SNT_LENGTH)
            total_batch_size += batch_size
            for insts in insts_list:
                loss = model(insts)
                if loss.item() > 0.:
                    loss.backward()
                    loss_value += loss.item()
                    assert not isinstance(loss_value,
                                          torch.Tensor), 'GPU memory leak'

            if batch_i == args.accum_steps and not args.debug:
                args.visual_logger.visual_histogram(model,
                                                    steps // args.accum_steps)
            if steps % args.accum_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
            if steps % (args.accum_steps * args.log_interval) == 0:
                print('[%d/%d], [%d/%d] Loss: %.05f' %
                      (epoch_i, args.epoch, batch_i // args.accum_steps,
                       len(train_data) // args.accum_steps,
                       loss_value / total_batch_size),
                      flush=True)
                visual_dic = {
                    'loss/train': loss_value,
                    'lr': optimizer.get_lr()[0]
                }
                if args.clip_grad:
                    visual_dic['norm'] = optimizer.get_dynamic_gard_norm()
                if not args.debug:
                    args.visual_logger.visual_scalars(
                        visual_dic, steps // args.accum_steps)
                loss_value, total_batch_size = 0., 0
                torch.cuda.empty_cache()
            if steps % (args.accum_steps * args.eval_interval) == 0:
                print('model evaluating starts...', flush=True)
                joint_fscore_dev, res_data_dev = eval_model(
                    model, dev_data, args.language,
                    args.DATASET_MAX_SNT_LENGTH, args.BATCH_MAX_SNT_LENGTH,
                    args.evalb_path, 'dev')
                joint_fscore_test, res_data_test = eval_model(
                    model, test_data, args.language,
                    args.DATASET_MAX_SNT_LENGTH, args.BATCH_MAX_SNT_LENGTH,
                    args.evalb_path, 'test')
                visual_dic = {
                    'F/parsing_dev': joint_fscore_dev.parsing_f,
                    'F/parsing_test': joint_fscore_test.parsing_f,
                    'F/ner_dev': joint_fscore_dev.ner_f,
                    'F/ner_test': joint_fscore_test.ner_f
                }
                if not args.debug:
                    args.visual_logger.visual_scalars(
                        visual_dic, steps // args.accum_steps)
                if best_dev is None or joint_fscore_dev.parsing_f > best_dev.parsing_f:
                    best_dev, best_test = joint_fscore_dev, joint_fscore_test
                    fitlog.add_best_metric({
                        'parsing_f_dev': best_dev.parsing_f,
                        'ner_f_test': best_test.ner_f
                    })
                    patience = args.patience
                    write_joint_data(args.save_path, res_data_dev, 'dev')
                    write_joint_data(args.save_path, res_data_test, 'test')
                    if args.save:
                        torch.save(
                            model.pack_state_dict(),
                            os.path.join(args.save_path,
                                         args.name + '.best.model.pt'))
                print('best performance:\ndev: %s\ntest: %s' %
                      (best_dev, best_test))
                print('model evaluating ends...', flush=True)
                del res_data_dev, res_data_test
                if args.debug:
                    exit(0)
            steps += 1

        if args.early_stop:
            patience -= 1
            if patience < 0:
                print('early stop')
                break

    # ====== postprocess ====== #
    postprocess(args, start)
Ejemplo n.º 2
0
def main():
    config = parse_args()

    # ========= Loading Dataset ========= #
    print(config)
    print("Loading dataset starts...")
    train_data, dev_data, test_data, train_dataset = load_data(config)
    print('\n\n', end='')

    # ========= Preparing Model ========= #
    print("Preparing Model starts...")
    if config.use_cuda and torch.cuda.is_available():
        config.device = torch.device('cuda:' + str(config.cuda_id))
        print('You will train model in cuda: %d.\n' % config.device.index)
    else:
        config.device = torch.device('cpu')
        print('GPU is not available, use CPU default.\n')

    pretra_char_embed, pretra_bichar_embed = load_pretrained_embeddings(
        train_dataset, config)
    model = ParaNNTranSegmentor(
        pretra_char_embed, train_dataset.get_char_vocab_size(),
        config.char_embed_dim, config.char_embed_dim_no_static,
        config.char_embed_max_norm, pretra_bichar_embed,
        train_dataset.get_bichar_vocab_size(), config.bichar_embed_dim,
        config.bichar_embed_dim_no_static, config.bichar_embed_max_norm,
        config.dropout_embed, config.encoder_embed_dim,
        config.dropout_encoder_embed, config.encoder_lstm_hid_size,
        config.dropout_encoder_hid, config.subword_lstm_hid_size,
        config.word_lstm_hid_size, config.device)
    if config.use_cuda and torch.cuda.is_available():
        model.to(config.device)
    print(model, end='\n\n\n')

    criterion = torch.nn.CrossEntropyLoss(reduction='sum').to(config.device)
    optimizer = Optim(config.opti_name, config.learning_rate,
                      config.weight_decay, model, config)
    visual_logger = VisualLogger(config.visual_logger_path)

    # ========= Training ========= #
    print('Training starts...')
    start = time.time()
    total_loss, golds_words, pred_words, seg_words, chars, cor_chars, steps = 0.0, 0, 0, 0, 0, 0, 1
    best_perf = [0, 0, 0., 0.]  # (epoch_idx, batch_idx, F_dev, F_test)
    for epoch_i in range(config.epoch):
        for batch_i, (insts, golds) in enumerate(train_data):
            insts = list(map(lambda x: x.to(config.device), insts))
            golds = golds.to(config.device)
            model.train()

            optimizer.zero_grad()
            pred = model(insts, golds)
            loss, golds_word, pred_word, seg_word, char, cor_char = cal_preformance(
                pred, golds, criterion, config.device)
            total_loss += loss.item()
            golds_words += golds_word
            pred_words += pred_word
            seg_words += seg_word
            chars += char
            cor_chars += cor_char

            loss.backward()
            optimizer.step()

            if steps % config.logInterval == 0:
                avg_loss = total_loss / chars
                P = seg_words / pred_words
                R = seg_words / golds_words
                F = (2 * P * R) / (P + R)
                print(
                    '[%d/%d], [%d/%d] Loss: %.05f, F: %.05f, P: %.05f, R: %.05f'
                    % (epoch_i + 1, config.epoch, batch_i + 1, len(train_data),
                       avg_loss, F, P, R))
                sys.stdout.flush()
                scal = {
                    'Loss': avg_loss,
                    'F': F,
                    'P': P,
                    'R': R,
                    'lr': optimizer.get_lr()[0]
                }
                visual_logger.visual_scalars(scal, steps, 'train')
                total_loss, golds_words, pred_words, seg_words, chars, cor_chars = 0.0, 0, 0, 0, 0, 0
                # break
            if steps % config.valInterval == 0:
                F_dev, F_test = eval_model(model, criterion, dev_data,
                                           test_data, config.device,
                                           visual_logger, steps)
                if F_dev > best_perf[2]:
                    best_perf[0], best_perf[1], best_perf[2], best_perf[
                        3] = epoch_i + 1, batch_i + 1, F_dev, F_test
                print(
                    'best performance: [%d/%d], [%d/%d], F_dev: %.05f, F_test: %.05f.'
                    % (best_perf[0], config.epoch, best_perf[1],
                       len(train_data), best_perf[2], best_perf[3]))
                sys.stdout.flush()
            if steps % config.visuParaInterval == 1:
                visual_logger.visual_histogram(model, steps)
            if steps % config.saveInterval == 0:
                if not os.path.exists(config.save_path):
                    os.mkdir(config.save_path)
                filename = '%d.model' % steps
                modelpath = os.path.join(config.save_path, filename)
                torch.save(model, modelpath)
            steps += 1
    exe_time = time.time() - start
    print('Executing time: %dh:%dm:%ds.' %
          (exe_time / 3600, (exe_time / 60) % 60, exe_time % 60))
    visual_logger.close()
    print('Training ends.')
Ejemplo n.º 3
0
def main():
    config = parse_args()
    set_seed(config.seed)

    # ========= Loading Dataset ========= #
    print(config)
    print("Loading dataset starts...")
    train_data, dev_data, test_data, train_dataset = load_data(config)
    print('\n\n', end='')

    # ========= Preparing Model ========= #
    print("Preparing Model starts...")
    if config.use_cuda and torch.cuda.is_available():
        config.device = torch.device('cuda:' + str(config.cuda_id))
        print('You will train model in cuda: %d.\n' % config.device.index)
    else:
        config.device = torch.device('cpu')
        print('GPU is not available, use CPU default.\n')

    model = Bert3Gram(config.device, config.cache_3gram_path)
    if config.use_cuda and torch.cuda.is_available():
        model.to(config.device)
    print(model, end='\n\n\n')

    criterion = torch.nn.CrossEntropyLoss(reduction='sum').to(config.device)
    optimizer = Optim(model, config)
    visual_logger = VisualLogger(config.visual_logger_path)

    # ========= Training ========= #
    print('Training starts...')
    start = time.time()
    total_loss, golds_words, pred_words, seg_words, chars, cor_chars, steps = 0.0, 0, 0, 0, 0, 0, 1
    best_perf = [0, 0, 0., 0.]  # (epoch_idx, batch_idx, F_dev, F_test)
    if config.freeze_bert:
        optimizer.set_freeze_by_idxs(
            [str(num) for num in range(0, config.freeze_bert_layers)], True)
        optimizer.free_embeddings()
        optimizer.freeze_pooler()
    for epoch_i in range(config.epoch):
        for batch_i, [insts, golds] in enumerate(train_data):
            golds = golds.to(config.device)
            model.train()

            pred = model(insts, golds)
            loss, golds_word, pred_word, seg_word, char, cor_char = cal_preformance(
                pred, golds, criterion, config.device)
            total_loss += loss.item()
            golds_words += golds_word
            pred_words += pred_word
            seg_words += seg_word
            chars += char
            cor_chars += cor_char

            loss.backward()

            if steps % config.accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
                torch.cuda.empty_cache()
            if steps % config.logInterval == 0:
                avg_loss = total_loss / chars
                P = seg_words / pred_words
                R = seg_words / golds_words
                F = (2 * P * R) / (P + R)
                print(
                    '[%d/%d], [%d/%d] Loss: %.05f, F: %.05f, P: %.05f, R: %.05f'
                    % (epoch_i + 1, config.epoch, batch_i + 1, len(train_data),
                       avg_loss, F, P, R))
                sys.stdout.flush()
                scal = {
                    'Loss': avg_loss,
                    'F': F,
                    'P': P,
                    'R': R,
                    'lr': optimizer.get_lr()[0]
                }
                visual_logger.visual_scalars(scal, steps, 'train')
                total_loss, golds_words, pred_words, seg_words, chars, cor_chars = 0.0, 0, 0, 0, 0, 0
                # break
            if steps % config.valInterval == 0:
                F_dev, F_test = eval_model(model, criterion, dev_data,
                                           test_data, config.device,
                                           visual_logger, steps)
                if F_dev > best_perf[2]:
                    best_perf[0], best_perf[1], best_perf[2], best_perf[
                        3] = epoch_i + 1, batch_i + 1, F_dev, F_test
                print(
                    'best performance: [%d/%d], [%d/%d], F_dev: %.05f, F_test: %.05f.'
                    % (best_perf[0], config.epoch, best_perf[1],
                       len(train_data), best_perf[2], best_perf[3]))
                sys.stdout.flush()
                optimizer.zero_grad()
                torch.cuda.empty_cache()
                # torch.save(model.pack_state_dict(), os.path.join(config.save_path, 'cnn.pt'))
            if steps % config.visuParaInterval == 1:
                visual_logger.visual_histogram(model, steps)
            if steps % config.saveInterval == 0:
                if not os.path.exists(config.save_path):
                    os.mkdir(config.save_path)
                filename = '%d.model' % steps
                modelpath = os.path.join(config.save_path, filename)
                torch.save(model, modelpath)
            steps += 1
    exe_time = time.time() - start
    print('Executing time: %dh:%dm:%ds.' %
          (exe_time / 3600, (exe_time / 60) % 60, exe_time % 60))
    visual_logger.close()
    print('Training ends.')
Ejemplo n.º 4
0
def main():
    # ====== preprocess ====== #
    args = preprocess()
    # ====== Loading dataset ====== #
    train_data, dev_data, subtree_vocab, token_vocab = load_data(
        args.input, args.batch_size, args.language, args.subword, args.debug)

    # ======= Preparing Model ======= #
    print("\nModel Preparing starts...")
    model = PretrainModel(
        subtree_vocab,
        token_vocab,
        # Embedding
        args.subword,
        args.bert,
        args.transliterate,
        args.d_model,
        args.partition,
        args.position_emb_dropout,
        args.bert_emb_dropout,
        args.emb_dropout,
        args.layer_num,
        args.hidden_dropout,
        args.attention_dropout,
        args.dim_ff,
        args.nhead,
        args.kqv_dim,
        args.label_hidden,
        # classifier
        args.language,
        args.device).cuda()
    # print(model, end='\n\n\n')
    optimizer = Optim(model, args.optim, args.lr, args.lr_fine_tune,
                      args.warmup_steps, args.lr_decay_factor,
                      args.weight_decay, args.clip_grad,
                      args.clip_grad_max_norm)
    optimizer.zero_grad()
    # if args.freeze_bert:
    #     optimizer.set_freeze_by_idxs([str(num) for num in range(0, config.freeze_bert_layers)], True)
    #     optimizer.free_embeddings()
    #     optimizer.freeze_pooler()
    #     print('freeze model of BERT %d layers' % config.freeze_bert_layers)

    # ========= Training ========= #
    print('Training starts...')
    start = time.time()
    steps, loss_value, total_batch_size = 1, 0., 0
    best_dev = 0.
    patience = args.patience
    total_subtree, tp_subtree = 0, 0
    total_head, tp_head = 0, 0
    total_mask_lm, tp_mask_lm = 0, 0
    for epoch_i in range(1, args.epoch + 1):
        for batch_i, insts in enumerate(train_data, start=1):
            model.train()

            insts, batch_size, max_len = batch_filter(
                insts, args.DATASET_MAX_SNT_LENGTH)
            insts_list = batch_spliter(insts, max_len,
                                       args.BATCH_MAX_SNT_LENGTH)
            total_batch_size += batch_size
            for insts in insts_list:
                loss, b_s, b_s_tp, b_h, b_h_tp, b_m, b_m_tp = model(insts)
                total_subtree += b_s
                tp_subtree += b_s_tp
                total_head += b_h
                tp_head += b_h_tp
                total_mask_lm += b_m
                tp_mask_lm += b_m_tp
                if loss.item() > 0.:
                    loss.backward()
                    loss_value += loss.item()
                    assert not isinstance(loss_value,
                                          torch.Tensor), 'GPU memory leak'

            if steps % args.accum_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
            if steps % (args.accum_steps * args.log_interval) == 0:
                print(
                    '[%d/%d], [%d/%d] Loss: %.05f, subtree_acc: %.03f, head_acc: %.03f, mask_lm: %.03f, total_acc: %.03f'
                    % (epoch_i, args.epoch, batch_i // args.accum_steps,
                       len(train_data) // args.accum_steps, loss_value /
                       total_batch_size, tp_subtree / total_subtree * 100,
                       tp_head / total_head * 100, tp_mask_lm / total_mask_lm *
                       100, (tp_subtree + tp_head + tp_mask_lm) /
                       (total_subtree + total_head + total_mask_lm) * 100),
                    flush=True)
                loss_value, total_batch_size = 0., 0
                total_subtree, tp_subtree = 0, 0
                total_head, tp_head = 0, 0
                total_mask_lm, tp_mask_lm = 0, 0
                torch.cuda.empty_cache()
            if steps % (args.accum_steps * args.eval_interval) == 0:
                patience -= 1
                print('model evaluating starts...', flush=True)
                dev_acc = eval_model(model, dev_data,
                                     args.DATASET_MAX_SNT_LENGTH,
                                     args.BATCH_MAX_SNT_LENGTH, 'dev')
                if best_dev < dev_acc:
                    best_dev = dev_acc
                    patience = args.patience
                    model.save_models(
                        os.path.join(args.save_path, 'best.model/'))
                print('best performance: ACC: %.03f' % (best_dev))
                print('model evaluating ends...', flush=True)
                if args.early_stop:
                    if patience < 0:
                        break
            if steps % (args.accum_steps * args.save_interval) == 0:
                model.save_models(
                    os.path.join(
                        args.save_path,
                        str(steps / args.accum_steps) + '.steps.model/'))
            steps += 1

        if args.early_stop:
            if patience < 0:
                print('early stop')
                break

    # ====== postprocess ====== #
    postprocess(args, start)