Ejemplo n.º 1
0
def main():

    vocab = data.Vocabulary()
    data.build_vocab(vocab, config.vector_file)  # build vocabulary

    # classifier = models.Attentionclassifier(vocab_size=vocab.n_words,
    #                                         emb_dim=config.DIM,
    #                                         hidden_size=config.HIDDEN_SIZE,
    #                                         num_layer=config.NUM_LAYER,
    #                                         dropout=config.drop_out,
    #                                         bidirectional=config.bidirectional,
    #                                         label_size=config.label_class,
    #                                         use_pretrain=True,
    #                                         embed_matrix=vocab.vector,
    #                                         embed_freeze=False).to(config.device)

    classifier = models.FinetuneModel1(vocab_size=vocab.n_words,
                                       emb_dim=config.DIM,
                                       hidden_size=config.HIDDEN_SIZE,
                                       num_layer=config.NUM_LAYER,
                                       dropout=config.drop_out,
                                       bidirectional=config.bidirectional,
                                       label_size=config.label_class,
                                       hidden_size1=128,
                                       use_pretrain=True,
                                       embed_matrix=vocab.vector,
                                       embed_freeze=False).to(config.device)

    model_dict = classifier.state_dict()

    pretrained_model = torch.load(config.model_path)

    pretrained_dict = dict()

    for k, v in pretrained_model.items():
        if k == 'state_dict':
            for kk, vv in v.items():
                if kk in model_dict:
                    pretrained_dict[kk] = vv

    # # 更新现有的model_dict
    model_dict.update(pretrained_dict)

    # 加载实际需要的model_dict
    classifier.load_state_dict(model_dict)
    # classifier.eval()
    test_data = data.Sentiment(config.predict_file, vocab)
    test_dataloader = DataLoader(test_data,
                                 batch_size=config.TRAIN_BATCH_SIZE,
                                 shuffle=True,
                                 collate_fn=data.collate_fn)
    predict(classifier, test_dataloader, config.silent)
Ejemplo n.º 2
0
def train():
    args = parse_args()
    if args.random_seed == 0:
        args.random_seed = None
        print("random seed is None")
    if args.enable_ce:
        random.seed(args.random_seed)
        np.random.seed(args.random_seed)
    logger = logging.getLogger("lm")
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    console_handler.setFormatter(formatter)

    logger.info('Running with args : {}'.format(args))
    logger.info('Running paddle : {}'.format(paddle.version.commit))

    hidden_size = args.hidden_size
    batch_size = args.batch_size
    data_path = args.data_path
    logger.info("begin to load vocab")
    vocab = data.Vocabulary(args.vocab_path, validate_file=True)
    vocab_size = vocab.size
    logger.info("finished load vocab")

    logger.info('build the model...')
    # build model
    train_prog = fluid.Program()
    train_startup_prog = fluid.Program()
    if args.enable_ce:
        train_prog.random_seed = args.random_seed
        train_startup_prog.random_seed = args.random_seed

    # build infer model
    infer_prog = fluid.Program()
    infer_startup_prog = fluid.Program()
    with fluid.program_guard(infer_prog, infer_startup_prog):
        with fluid.unique_name.guard():
            # Infer process
            infer_model = lm_model.LanguageModel(args,
                                                 vocab_size,
                                                 test_mode=True)
            infer_model.build()
    infer_progs = infer_prog, infer_startup_prog, infer_model

    with fluid.program_guard(train_prog, train_startup_prog):
        with fluid.unique_name.guard():
            # Training process
            train_model = lm_model.LanguageModel(args,
                                                 vocab_size,
                                                 test_mode=False)
            train_model.build()
            fluid.clip.set_gradient_clip(
                clip=fluid.clip.GradientClipByGlobalNorm(
                    clip_norm=args.max_grad_norm))

            # build optimizer
            if args.optim == 'adagrad':
                optimizer = fluid.optimizer.Adagrad(
                    learning_rate=args.learning_rate,
                    epsilon=0.0,
                    initial_accumulator_value=1.0)
            elif args.optim == 'sgd':
                optimizer = fluid.optimizer.SGD(
                    learning_rate=args.learning_rate)
            elif args.optim == 'adam':
                optimizer = fluid.optimizer.Adam(
                    learning_rate=args.learning_rate)
            elif args.optim == 'rprop':
                optimizer = fluid.optimizer.RMSPropOptimizer(
                    learning_rate=args.learning_rate)
            else:
                logger.error('Unsupported optimizer: {}'.format(args.optim))
                exit(-1)
            optimizer.minimize(train_model.loss * args.num_steps)

            # initialize parameters
            place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
            exe = Executor(place)
    train_progs = train_prog, train_startup_prog, train_model

    if args.local:
        logger.info("local start_up:")
        train_loop(args, logger, vocab, train_progs, infer_progs, optimizer)
    else:
        if args.update_method == "nccl2":
            trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
            if args.test_nccl:
                worker_endpoints_env = os.getenv("PADDLE_WORK_ENDPOINTS")
                worker_endpoints = worker_endpoints_env.split(',')
                trainers_num = len(worker_endpoints)
                current_endpoint = worker_endpoints[trainer_id]
            else:
                port = os.getenv("PADDLE_PORT")
                worker_ips = os.getenv("PADDLE_TRAINERS")
                worker_endpoints = []
                for ip in worker_ips.split(","):
                    worker_endpoints.append(':'.join([ip, port]))
                worker_endpoints_env = ','.join(worker_endpoints)
                trainers_num = len(worker_endpoints)
                current_endpoint = os.getenv("POD_IP") + ":" + port
            if trainer_id == 0:
                logger.info("train_id == 0, sleep 60s")
                time.sleep(60)

            logger.info("trainers_num:{}".format(trainers_num))
            logger.info("worker_endpoints:{}".format(worker_endpoints))
            logger.info("current_endpoint:{}".format(current_endpoint))
            config = fluid.DistributeTranspilerConfig()
            config.mode = "nccl2"
            t = fluid.DistributeTranspiler(config=config)
            t.transpile(trainer_id,
                        trainers=worker_endpoints_env,
                        current_endpoint=current_endpoint,
                        program=train_prog,
                        startup_program=train_startup_prog)
            train_progs = train_prog, train_startup_prog, train_model
            train_loop(args, logger, vocab, train_progs, infer_progs,
                       optimizer, trainers_num, trainer_id, worker_endpoints)
        else:
            port = os.getenv("PADDLE_PORT", "6174")
            pserver_ips = os.getenv("PADDLE_PSERVERS")
            eplist = []
            for ip in pserver_ips.split(","):
                eplist.append(':'.join([ip, port]))
            pserver_endpoints = ",".join(eplist)
            trainers = int(os.getenv("PADDLE_TRAINERS_NUM", "0"))
            current_endpoint = os.getenv("POD_IP") + ":" + port
            trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))

            logger.info("pserver_endpoints:{}".format(pserver_endpoints))
            logger.info("current_endpoint:{}".format(current_endpoint))
            logger.info("trainer_id:{}".format(trainer_id))
            logger.info("pserver_ips:{}".format(pserver_ips))
            logger.info("port:{}".format(port))

            t = fluid.DistributeTranspiler()
            t.transpile(trainer_id,
                        pservers=pserver_endpoints,
                        trainers=trainers,
                        program=train_prog,
                        startup_program=startup_prog)

            if training_role == "PSERVER":
                logger.info("distributed: pserver started")
                current_endpoint = os.getenv("POD_IP") + ":" + os.getenv(
                    "PADDLE_PORT")
                if not current_endpoint:
                    logger.critical("need env SERVER_ENDPOINT")
                    exit(1)
                pserver_prog = t.get_pserver_program(current_endpoint)
                pserver_startup = t.get_startup_program(
                    current_endpoint, pserver_prog)

                exe.run(pserver_startup)
                exe.run(pserver_prog)
            elif training_role == "TRAINER":
                logger.info("distributed: trainer started")
                trainer_prog = t.get_trainer_program()
                train_loop(args, logger, vocab, train_progs, infer_progs,
                           optimizer)
            else:
                logger.critical(
                    "environment var TRAINER_ROLE should be TRAINER os PSERVER"
                )
                exit(1)
Ejemplo n.º 3
0
def finetune():
    vocab = data.Vocabulary()
    data.build_vocab(vocab, config.vector_file)  # build vocabulary

    train_data = data.Sentiment(config.finetune_train_file, vocab)

    train_dataloader = DataLoader(train_data,
                                  batch_size=config.TRAIN_BATCH_SIZE,
                                  shuffle=True,
                                  collate_fn=data.collate_fn)

    valid_data = data.Sentiment(config.finetune_valid_file, vocab)

    valid_dataloader = DataLoader(valid_data,
                                  batch_size=config.TRAIN_BATCH_SIZE,
                                  shuffle=True,
                                  collate_fn=data.collate_fn)

    test_data = data.Sentiment(config.finetune_test_file, vocab)

    test_dataloader = DataLoader(test_data,
                                 batch_size=config.TRAIN_BATCH_SIZE,
                                 shuffle=True,
                                 collate_fn=data.collate_fn)

    classifier = models.FinetuneModel1(vocab_size=vocab.n_words,
                                       emb_dim=config.DIM,
                                       hidden_size=config.HIDDEN_SIZE,
                                       num_layer=config.NUM_LAYER,
                                       dropout=config.drop_out,
                                       bidirectional=config.bidirectional,
                                       label_size=config.label_class,
                                       hidden_size1=128,
                                       use_pretrain=True,
                                       embed_matrix=vocab.vector,
                                       embed_freeze=False).to(config.device)

    model_dict = classifier.state_dict()

    pretrained_model = torch.load(config.model_path)

    # 将pretrained_dict里不属于model_dict的键剔除掉

    pretrained_dict = dict()

    for k, v in pretrained_model.items():
        if k == 'state_dict':
            for kk, vv in v.items():
                if kk in model_dict:
                    pretrained_dict[kk] = vv

    # 更新现有的model_dict
    model_dict.update(pretrained_dict)

    # 加载实际需要的model_dict
    classifier.load_state_dict(model_dict)

    # 固定网络参数,不更新
    for param in classifier.parameters():
        param.requires_grad = False

    # 将最后final层的参数设置可以更新
    for param in classifier.final.parameters():
        param.requires_grad = True

    # new_model = models.FinetuneModel(classifier, hidden_size1=128, class_size=2)
    # print(new_model)

    criterion = nn.NLLLoss()
    # optimizer = torch.optim.Adam(classifier.parameters())
    # optimizer = torch.optim.RMSprop(classifier.parameters(), lr=0.001, alpha=0.9, momentum=0.2)
    optimizer = torch.optim.Adadelta(filter(lambda p: p.requires_grad,
                                            classifier.parameters()),
                                     lr=0.01,
                                     rho=0.9,
                                     eps=1e-06,
                                     weight_decay=0)
    # optimizer = torch.optim.RMSprop(classifier.parameters())

    best_f1 = 0

    for epoch in range(config.finetune_epochs):

        # lr update
        # adjust_learning_rate(optimizer, epoch)
        # 测试不同优化器的学习率是否是自适应的
        for param_group in optimizer.param_groups:
            print("here lr :{}".format(param_group['lr']))

        logging.info("epoch {0:04d}".format(epoch))
        main.train(train_dataloader, classifier, criterion, optimizer, epoch,
                   config.finetune_batch_size, config.silent)
        test_f1, val_loss = main.test(valid_dataloader, classifier, criterion,
                                      epoch, config.finetune_batch_size,
                                      config.silent)

        is_best = test_f1 > best_f1  # True or False
        best_f1 = max(test_f1, best_f1)

        logging.info("best f1 is {}".format(best_f1))
        main.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': classifier.state_dict(),
                'acc': test_f1,
                'best_acc': best_f1,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint='../output/',
            save_file='finetune_model_best.pth.tar')

    predict.predict(classifier, test_dataloader, config.silent)
Ejemplo n.º 4
0
import torch
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader
import numpy as np
from torch.autograd import Variable
import torch.nn.functional as F

if __name__ == "__main__":

    print("starting...")
    # prepare data
    csv_dataset = pd.read_csv(config.file_name,
                              header=None)  # csv_file format: dataframe
    print("data loaded")
    vocab = data.Vocabulary()
    data.build_vocab(vocab)  # build vocabulary

    print("build vocab success")
    train_data = data.sentimentDataset(vocab,
                                       csv_dataset,
                                       train_size=config.TRAIN_RATIO,
                                       test_size=config.TEST_RATIO,
                                       train=True)
    test_data = data.sentimentDataset(vocab, csv_dataset, train=False)

    train_dataloader = DataLoader(train_data,
                                  batch_size=config.TRAIN_BATCH_SIZE,
                                  shuffle=True,
                                  collate_fn=data.collate_fn)
    test_dataloader = DataLoader(test_data,
Ejemplo n.º 5
0
def main():

    best_f1 = 0
    print(config.device)

    vocab = data.Vocabulary()
    data.build_vocab(vocab, config.vector_file)  # build vocabulary

    train_data = data.Sentiment(config.train_file, vocab)

    train_dataloader = DataLoader(train_data,
                                  batch_size=config.TRAIN_BATCH_SIZE,
                                  shuffle=True,
                                  collate_fn=data.collate_fn)

    test_data = data.Sentiment(config.test_file, vocab)

    test_dataloader = DataLoader(test_data,
                                 batch_size=config.TRAIN_BATCH_SIZE,
                                 shuffle=True,
                                 collate_fn=data.collate_fn)

    # classifier = models.RNNClassifier(nembedding=config.DIM,
    #                                   hidden_size=config.HIDDEN_SIZE,
    #                                   num_layer=config.NUM_LAYER,
    #                                   dropout=config.drop_out,
    #                                   vocab_size=vocab.n_words,
    #                                   use_pretrain=True,
    #                                   embed_matrix=vocab.vector,
    #                                   embed_freeze=False,
    #                                   label_size=config.label_class).to(config.device)

    classifier = models.Attentionclassifier(vocab_size=vocab.n_words,
                                            emb_dim=config.DIM,
                                            hidden_size=config.HIDDEN_SIZE,
                                            num_layer=config.NUM_LAYER,
                                            dropout=config.drop_out,
                                            bidirectional=config.bidirectional,
                                            label_size=config.label_class,
                                            use_pretrain=True,
                                            embed_matrix=vocab.vector,
                                            embed_freeze=False).to(
                                                config.device)

    criterion = nn.NLLLoss()
    # optimizer = torch.optim.Adam(classifier.parameters())
    optimizer = torch.optim.RMSprop(classifier.parameters(),
                                    lr=config.LR,
                                    alpha=0.9,
                                    momentum=0.2)
    # optimizer = torch.optim.RMSprop(classifier.parameters())

    # optimizer, scheduler = adam_optimizers(classifier.parameters())

    # optimizer = torch.optim.Adadelta(classifier.parameters(), lr=config.LR, rho=0.9, eps=1e-06, weight_decay=0)

    for epoch in range(config.epochs):

        # lr update
        adjust_learning_rate(optimizer, epoch)
        # 测试不同优化器的学习率是否是自适应的
        # for param_group in optimizer.param_groups:
        #     print("here lr :{}".format(param_group['lr']))

        logging.info("epoch {0:04d}".format(epoch))
        train(train_dataloader, classifier, criterion, optimizer, epoch,
              config.TRAIN_BATCH_SIZE, config.silent)
        test_f1, val_loss = test(test_dataloader, classifier, criterion, epoch,
                                 config.TRAIN_BATCH_SIZE, config.silent)

        # scheduler.step(val_loss)

        is_best = test_f1 > best_f1  # True or False
        best_f1 = max(test_f1, best_f1)

        logging.info("best f1 is {}".format(best_f1))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': classifier.state_dict(),
                'acc': test_f1,
                'best_acc': best_f1,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint='../output/')