def build_model(checkpoints, config, device): """ build model, either Seq2Seq or Tensor2Tensor :param checkpoints: load checkpoint if there is pretrained model :return: model, optimizer and the print function """ print(config) # model print("building model...\n") model = getattr(models, config.model)( config, src_padding_idx=utils.PAD, tgt_padding_idx=utils.PAD, label_smoothing=config.label_smoothing, ) model.to(device) if config.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-config.param_init, config.param_init) if config.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) if checkpoints is not None: model.load_state_dict(checkpoints["model"]) if config.pretrain: print("loading checkpoint from %s" % config.pretrain) pre_ckpt = torch.load( config.pretrain, map_location=lambda storage, loc: storage)["model"] model.load_state_dict(pre_ckpt) optim = models.Optim( config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_steps=config.start_decay_steps, beta1=config.beta1, beta2=config.beta2, decay_method=config.decay_method, warmup_steps=config.warmup_steps, model_size=config.hidden_size, ) optim.set_parameters(model.parameters()) print(optim) if checkpoints is not None: optim.optimizer.load_state_dict(checkpoints["optim"]) print(type(checkpoints['optim']), optim) optim.set_parameters(model.parameters()) param_count = sum( [param.view(-1).size()[0] for param in model.parameters()]) print(repr(model) + "\n\n") print("total number of parameters: %d\n\n" % param_count) return model, optim
def build_model(checkpoints, print_log): """ 作用:创建模型 参数:checkpoints、print_log: 返回值:model, optim, print_log """ # 将配置参数写入log文件 # for k, v in config.items(): # print_log("%s:\t%s\n" % (str(k), str(v))) # --------------创建模型------------- # print('building model...\n') # getattr() 函数用于返回一个对象属性值。返回一个seq2seq对象, config 作为模型对象初始化参数 model = getattr(models, opt.model)(config) if checkpoints is not None: # 加载模型参数 model.load_state_dict(checkpoints['model']) if opt.pretrain: # 加载预训练 print('loading checkpoint from %s' % opt.pretrain) pre_ckpt = torch.load(opt.pretrain)['model'] pre_ckpt = OrderedDict({ key[8:]: pre_ckpt[key] for key in pre_ckpt if key.startswith('encoder') }) print(model.encoder.state_dict().keys()) print(pre_ckpt.keys()) model.encoder.load_state_dict(pre_ckpt) if use_cuda: model.cuda() # optimizer if checkpoints is not None: optim = checkpoints['optim'] else: # 使用Optim对象 optim = models.Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) # 设置optim的参数 optim.set_parameters(model.parameters()) # print log,并将模型参数打印出来 # param_count = 0 # for param in model.parameters(): # param_count += param.view(-1).size()[0] # for k, v in config.items(): # print_log("%s:\t%s\n" % (str(k), str(v))) # print_log("\n") # print_log(repr(model) + "\n\n") # print_log('total number of parameters: %d\n\n' % param_count) return model, optim, print_log
def build_model(checkpoints, print_log): for k, v in config.items(): print_log("%s:\t%s\n" % (str(k), str(v))) # model print('building model...\n') model = getattr(models, config.model)(config, src_padding_idx=utils.PAD, tgt_padding_idx=utils.PAD) if config.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-config.param_init, config.param_init) if config.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) if checkpoints is not None: model.load_state_dict(checkpoints['model']) if opt.pretrain: print('loading checkpoint from %s' % opt.pretrain) pre_ckpt = torch.load(opt.pretrain)['model'] pre_ckpt = OrderedDict({key[8:]: pre_ckpt[key] for key in pre_ckpt if key.startswith('encoder')}) print(model.encoder.state_dict().keys()) print(pre_ckpt.keys()) model.encoder.load_state_dict(pre_ckpt) if use_cuda: model.cuda() # optimizer if checkpoints is not None: optim = checkpoints['optim'] else: optim = models.Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_steps=config.start_decay_steps, beta1=config.beta1, beta2=config.beta2, decay_method=config.decay_method, warmup_steps=config.warmup_steps, model_size=config.hidden_size) optim.set_parameters(model.parameters()) # print log param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] for k, v in config.items(): print_log("%s:\t%s\n" % (str(k), str(v))) print_log("\n") print_log(repr(model) + "\n\n") print_log('total number of parameters: %d\n\n' % param_count) return model, optim, print_log
def build_model(checkpoints, print_log): # for k, v in config.items(): # print_log("%s:\t%s\n" % (str(k), str(v))) # model print('building model...\n') model = getattr(models, opt.model)(config) if checkpoints is not None: model.load_state_dict(checkpoints['model']) if opt.pretrain: print('loading checkpoint from %s' % opt.pretrain) pre_ckpt = torch.load(opt.pretrain)['model'] pre_ckpt = OrderedDict({ key[8:]: pre_ckpt[key] for key in pre_ckpt if key.startswith('encoder') }) print(model.encoder.state_dict().keys()) print(pre_ckpt.keys()) model.encoder.load_state_dict(pre_ckpt) if use_cuda: model.cuda() # optimizer if checkpoints is not None: optim = checkpoints['optim'] else: optim = models.Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim.set_parameters(model.parameters()) #scheduler if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) # print log param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] for k, v in config.items(): print_log("%s:\t%s\n" % (str(k), str(v))) print_log("\n") print_log(repr(model) + "\n\n") print_log('total number of parameters: %d\n\n' % param_count) return model, optim, print_log
def build_model(checkpoints, print_log): with open(config.logF + config.log + '/' + 'default.yaml', "w") as f: yaml.dump(dict(config), f) # model print('building model...\n') model = getattr(models, config.model)(config) if checkpoints is not None: model.load_state_dict(checkpoints['model']) if opt.pretrain: print('loading checkpoint from %s' % opt.pretrain) pre_ckpt = torch.load(opt.pretrain)['model'] pre_ckpt = OrderedDict({ key[8:]: pre_ckpt[key] for key in pre_ckpt if key.startswith('encoder') }) print(model.encoder.state_dict().keys()) print(pre_ckpt.keys()) model.encoder.load_state_dict(pre_ckpt) if use_cuda: model.cuda() # optimizer if checkpoints is not None: optim = checkpoints['optim'] else: optim = models.Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim.set_parameters(model.parameters()) # print log param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] print_log("\n") print_log(repr(model) + "\n\n") print_log('total number of parameters: %d\n\n' % param_count) return model, optim, print_log
if key in model_dict: model_train_dict[key] = value model_dict.update(model_train_dict) model.load_state_dict(model_dict) if use_cuda: model = model.cuda() if len(config.gpus) > 1: model = nn.DataParallel(model, device_ids=config.gpus, dim=0) # optim if config.checkpoint_restore: optim = checkpoints['optim'] else: # optim = models.Optim(config.optim, config.learning_rate, config.max_grad_norm, # lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim = models.Optim(config.optim, config.learning_rate, config.max_grad_norm, initial_accumulator_value=config.adagrad_init_acc) optim.set_parameters(model.parameters()) if config.schedule: scheduler = models.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) # total number of parameters param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] logging('model have {} param'.format(param_count)) for k, v in config.items(): logging("%s:\t%s" % (str(k), str(v))) logging(repr(model) + "\n") # model relation data if config.checkpoint_restore:
def build_model(checkpoints, config, device): """ build model, either Seq2Seq or Tensor2Tensor Tensor2Tensor就是transformer的组合 :param checkpoints: load checkpoint if there is pretrained model :return: model, optimizer and the print function """ print(config) # model print("building model...\n") model = getattr(models, config.model)( config, src_padding_idx=utils.PAD, # 输入 tgt_padding_idx=utils.PAD, # 输出 label_smoothing=config.label_smoothing, # 啥是label_smoothing # 是一个函数,在tensor2tensor.py当中 ) model.to(device) # 把model加载到device上? if config.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-config.param_init, config.param_init) # 给了一个随机初始化的范围 if config.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) if checkpoints is not None: model.load_state_dict(checkpoints["model"]) # 载入已有模型的准备? if config.pretrain: print("loading checkpoint from %s" % config.pretrain) pre_ckpt = torch.load( config.pretrain, map_location=lambda storage, loc: storage)["model"] model.load_state_dict(pre_ckpt) # 载入已有模型 optim = models.Optim( config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_steps=config.start_decay_steps, beta1=config.beta1, beta2=config.beta2, decay_method=config.decay_method, warmup_steps=config.warmup_steps, model_size=config.hidden_size, ) # 优化期的选择 print(optim) optim.set_parameters(model.parameters()) if checkpoints is not None: optim.optimizer.load_state_dict(checkpoints["optim"]) # 载入已有模型的优化器参数 param_count = sum( [param.view(-1).size()[0] for param in model.parameters()]) # 模型的参数或者说变量的个数 print(repr(model) + "\n\n") # 模型的打印显示 print("total number of parameters: %d\n\n" % param_count) return model, optim