Beispiel #1
0
def build_model(checkpoints, config, device):
    """
    build model, either Seq2Seq or Tensor2Tensor
    :param checkpoints: load checkpoint if there is pretrained model
    :return: model, optimizer and the print function
    """
    print(config)

    # model
    print("building model...\n")
    model = getattr(models, config.model)(
        config,
        src_padding_idx=utils.PAD,
        tgt_padding_idx=utils.PAD,
        label_smoothing=config.label_smoothing,
    )
    model.to(device)
    if config.param_init != 0.0:
        for p in model.parameters():
            p.data.uniform_(-config.param_init, config.param_init)
    if config.param_init_glorot:
        for p in model.parameters():
            if p.dim() > 1:
                xavier_uniform_(p)
    if checkpoints is not None:
        model.load_state_dict(checkpoints["model"])
    if config.pretrain:
        print("loading checkpoint from %s" % config.pretrain)
        pre_ckpt = torch.load(
            config.pretrain,
            map_location=lambda storage, loc: storage)["model"]
        model.load_state_dict(pre_ckpt)

    optim = models.Optim(
        config.optim,
        config.learning_rate,
        config.max_grad_norm,
        lr_decay=config.learning_rate_decay,
        start_decay_steps=config.start_decay_steps,
        beta1=config.beta1,
        beta2=config.beta2,
        decay_method=config.decay_method,
        warmup_steps=config.warmup_steps,
        model_size=config.hidden_size,
    )
    optim.set_parameters(model.parameters())
    print(optim)
    if checkpoints is not None:
        optim.optimizer.load_state_dict(checkpoints["optim"])
        print(type(checkpoints['optim']), optim)
        optim.set_parameters(model.parameters())

    param_count = sum(
        [param.view(-1).size()[0] for param in model.parameters()])
    print(repr(model) + "\n\n")
    print("total number of parameters: %d\n\n" % param_count)

    return model, optim
Beispiel #2
0
def build_model(checkpoints, print_log):
    """
    作用:创建模型
    参数:checkpoints、print_log:
    返回值:model, optim, print_log
    """
    # 将配置参数写入log文件
    # for k, v in config.items():
    #     print_log("%s:\t%s\n" % (str(k), str(v)))
    # --------------创建模型------------- #
    print('building model...\n')
    # getattr() 函数用于返回一个对象属性值。返回一个seq2seq对象, config 作为模型对象初始化参数
    model = getattr(models, opt.model)(config)
    if checkpoints is not None:
        # 加载模型参数
        model.load_state_dict(checkpoints['model'])
    if opt.pretrain:
        # 加载预训练
        print('loading checkpoint from %s' % opt.pretrain)
        pre_ckpt = torch.load(opt.pretrain)['model']
        pre_ckpt = OrderedDict({
            key[8:]: pre_ckpt[key]
            for key in pre_ckpt if key.startswith('encoder')
        })
        print(model.encoder.state_dict().keys())
        print(pre_ckpt.keys())
        model.encoder.load_state_dict(pre_ckpt)
    if use_cuda:
        model.cuda()

    # optimizer
    if checkpoints is not None:
        optim = checkpoints['optim']
    else:
        # 使用Optim对象
        optim = models.Optim(config.optim,
                             config.learning_rate,
                             config.max_grad_norm,
                             lr_decay=config.learning_rate_decay,
                             start_decay_at=config.start_decay_at)
    # 设置optim的参数
    optim.set_parameters(model.parameters())

    # print log,并将模型参数打印出来
    # param_count = 0
    # for param in model.parameters():
    #     param_count += param.view(-1).size()[0]
    # for k, v in config.items():
    #     print_log("%s:\t%s\n" % (str(k), str(v)))
    # print_log("\n")
    # print_log(repr(model) + "\n\n")
    # print_log('total number of parameters: %d\n\n' % param_count)

    return model, optim, print_log
Beispiel #3
0
def build_model(checkpoints, print_log):
	for k, v in config.items():
		print_log("%s:\t%s\n" % (str(k), str(v)))

	# model
	print('building model...\n')
	model = getattr(models, config.model)(config,
										  src_padding_idx=utils.PAD,
										  tgt_padding_idx=utils.PAD)
	if config.param_init != 0.0:
		for p in model.parameters():
			p.data.uniform_(-config.param_init, config.param_init)
	if config.param_init_glorot:
		for p in model.parameters():
			if p.dim() > 1:
				xavier_uniform_(p)
	if checkpoints is not None:
		model.load_state_dict(checkpoints['model'])
	if opt.pretrain:
		print('loading checkpoint from %s' % opt.pretrain)
		pre_ckpt = torch.load(opt.pretrain)['model']
		pre_ckpt = OrderedDict({key[8:]: pre_ckpt[key]
								for key in pre_ckpt if key.startswith('encoder')})
		print(model.encoder.state_dict().keys())
		print(pre_ckpt.keys())
		model.encoder.load_state_dict(pre_ckpt)
	if use_cuda:
		model.cuda()

	# optimizer
	if checkpoints is not None:
		optim = checkpoints['optim']
	else:
		optim = models.Optim(config.optim,
							 config.learning_rate, config.max_grad_norm,
							 lr_decay=config.learning_rate_decay,
							 start_decay_steps=config.start_decay_steps,
							 beta1=config.beta1, beta2=config.beta2,
							 decay_method=config.decay_method,
							 warmup_steps=config.warmup_steps,
							 model_size=config.hidden_size)
	optim.set_parameters(model.parameters())

	# print log
	param_count = 0
	for param in model.parameters():
		param_count += param.view(-1).size()[0]
	for k, v in config.items():
		print_log("%s:\t%s\n" % (str(k), str(v)))
	print_log("\n")
	print_log(repr(model) + "\n\n")
	print_log('total number of parameters: %d\n\n' % param_count)

	return model, optim, print_log
Beispiel #4
0
def build_model(checkpoints, print_log):
    #    for k, v in config.items():
    #        print_log("%s:\t%s\n" % (str(k), str(v)))

    # model
    print('building model...\n')
    model = getattr(models, opt.model)(config)
    if checkpoints is not None:
        model.load_state_dict(checkpoints['model'])
    if opt.pretrain:
        print('loading checkpoint from %s' % opt.pretrain)
        pre_ckpt = torch.load(opt.pretrain)['model']
        pre_ckpt = OrderedDict({
            key[8:]: pre_ckpt[key]
            for key in pre_ckpt if key.startswith('encoder')
        })
        print(model.encoder.state_dict().keys())
        print(pre_ckpt.keys())
        model.encoder.load_state_dict(pre_ckpt)
    if use_cuda:
        model.cuda()

    # optimizer
    if checkpoints is not None:
        optim = checkpoints['optim']
    else:
        optim = models.Optim(config.optim,
                             config.learning_rate,
                             config.max_grad_norm,
                             lr_decay=config.learning_rate_decay,
                             start_decay_at=config.start_decay_at)
    optim.set_parameters(model.parameters())

    #scheduler
    if config.schedule:
        scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)

    # print log
    param_count = 0
    for param in model.parameters():
        param_count += param.view(-1).size()[0]
    for k, v in config.items():
        print_log("%s:\t%s\n" % (str(k), str(v)))
    print_log("\n")
    print_log(repr(model) + "\n\n")
    print_log('total number of parameters: %d\n\n' % param_count)

    return model, optim, print_log
Beispiel #5
0
def build_model(checkpoints, print_log):
    with open(config.logF + config.log + '/' + 'default.yaml', "w") as f:
        yaml.dump(dict(config), f)
    # model
    print('building model...\n')
    model = getattr(models, config.model)(config)
    if checkpoints is not None:
        model.load_state_dict(checkpoints['model'])
    if opt.pretrain:
        print('loading checkpoint from %s' % opt.pretrain)
        pre_ckpt = torch.load(opt.pretrain)['model']
        pre_ckpt = OrderedDict({
            key[8:]: pre_ckpt[key]
            for key in pre_ckpt if key.startswith('encoder')
        })
        print(model.encoder.state_dict().keys())
        print(pre_ckpt.keys())
        model.encoder.load_state_dict(pre_ckpt)
    if use_cuda:
        model.cuda()

    # optimizer
    if checkpoints is not None:
        optim = checkpoints['optim']
    else:
        optim = models.Optim(config.optim,
                             config.learning_rate,
                             config.max_grad_norm,
                             lr_decay=config.learning_rate_decay,
                             start_decay_at=config.start_decay_at)
    optim.set_parameters(model.parameters())

    # print log
    param_count = 0
    for param in model.parameters():
        param_count += param.view(-1).size()[0]

    print_log("\n")
    print_log(repr(model) + "\n\n")
    print_log('total number of parameters: %d\n\n' % param_count)

    return model, optim, print_log
Beispiel #6
0
        if key in model_dict:
            model_train_dict[key] = value
    model_dict.update(model_train_dict)
    model.load_state_dict(model_dict)
if use_cuda:
    model = model.cuda()
if len(config.gpus) > 1:
    model = nn.DataParallel(model, device_ids=config.gpus, dim=0)
# optim
if config.checkpoint_restore:
    optim = checkpoints['optim']
else:
    # optim = models.Optim(config.optim, config.learning_rate, config.max_grad_norm,
    #               lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at)
    optim = models.Optim(config.optim,
                         config.learning_rate,
                         config.max_grad_norm,
                         initial_accumulator_value=config.adagrad_init_acc)
optim.set_parameters(model.parameters())
if config.schedule:
    scheduler = models.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)
# total number of parameters
param_count = 0
for param in model.parameters():
    param_count += param.view(-1).size()[0]
logging('model have {} param'.format(param_count))
for k, v in config.items():
    logging("%s:\t%s" % (str(k), str(v)))
logging(repr(model) + "\n")

# model relation data
if config.checkpoint_restore:
def build_model(checkpoints, config, device):
    """
    build model, either Seq2Seq or Tensor2Tensor
    Tensor2Tensor就是transformer的组合
    :param checkpoints: load checkpoint if there is pretrained model
    :return: model, optimizer and the print function
    """
    print(config)

    # model
    print("building model...\n")
    model = getattr(models, config.model)(
        config,
        src_padding_idx=utils.PAD,  # 输入
        tgt_padding_idx=utils.PAD,  # 输出
        label_smoothing=config.label_smoothing,  # 啥是label_smoothing
        # 是一个函数,在tensor2tensor.py当中
    )
    model.to(device)  # 把model加载到device上?
    if config.param_init != 0.0:
        for p in model.parameters():
            p.data.uniform_(-config.param_init, config.param_init)
            # 给了一个随机初始化的范围
    if config.param_init_glorot:
        for p in model.parameters():
            if p.dim() > 1:
                xavier_uniform_(p)
    if checkpoints is not None:
        model.load_state_dict(checkpoints["model"])
        # 载入已有模型的准备?
    if config.pretrain:
        print("loading checkpoint from %s" % config.pretrain)
        pre_ckpt = torch.load(
            config.pretrain,
            map_location=lambda storage, loc: storage)["model"]
        model.load_state_dict(pre_ckpt)
        # 载入已有模型

    optim = models.Optim(
        config.optim,
        config.learning_rate,
        config.max_grad_norm,
        lr_decay=config.learning_rate_decay,
        start_decay_steps=config.start_decay_steps,
        beta1=config.beta1,
        beta2=config.beta2,
        decay_method=config.decay_method,
        warmup_steps=config.warmup_steps,
        model_size=config.hidden_size,
    )  # 优化期的选择
    print(optim)
    optim.set_parameters(model.parameters())
    if checkpoints is not None:
        optim.optimizer.load_state_dict(checkpoints["optim"])
        # 载入已有模型的优化器参数

    param_count = sum(
        [param.view(-1).size()[0] for param in model.parameters()])
    # 模型的参数或者说变量的个数
    print(repr(model) + "\n\n")  # 模型的打印显示
    print("total number of parameters: %d\n\n" % param_count)

    return model, optim