def init_parameters(self, pretrain_path=None, device=None): if pretrain_path is None: return else: INFO("Loading pretrained parameters for LM from {}".format( pretrain_path)) need_pretrain_params_prefix = [ 'decoder.embeddings', 'decoder.cell', 'decoder.linear_hidden', 'generator' ] pretrain_params = torch.load(pretrain_path, map_location=device) for name, params in pretrain_params.items(): for pp in need_pretrain_params_prefix: if name.startswith(pp): INFO("Loading param: {}...".format(name)) try: self.load_state_dict({name: params}, strict=False) except Exception as e: WARN("{}: {}".format(str(Exception), e)) INFO("Pretrained model loaded.") self.decoder.embeddings.embeddings.weight.requires_grad = False self.generator.proj.weight.requires_grad = False
def load_pretrain_model(self, pretrain_path=None, device=None): if pretrain_path is None: return else: INFO("Loading pretrained parameters for LM from {}".format( pretrain_path)) need_pretrain_params_prefix = [ 'encoder', 'decoder.embeddings', 'generator', ] pretrained_params_name = [] pretrain_params = torch.load(pretrain_path, map_location=device) for name, param in pretrain_params.items(): for pp in need_pretrain_params_prefix: if name.startswith(pp): INFO("Loading param: {}...".format(name)) try: self.load_state_dict({name: param}, strict=False) except Exception as e: WARN("{}: {}".format(str(Exception), e)) pretrained_params_name.append(name) # Frozen pretrained parameters # for name, param in self.named_parameters(): # if name in pretrained_params_name: # param.requires_grad = False # INFO("Pretrained model loaded.")
def shuffle(*path): f_handles = [open(p) for p in path] # Read all the data lines = [] for l in f_handles[0]: line = [l.strip()] + [ff.readline().strip() for ff in f_handles[1:]] lines.append(line) # close file handles [f.close() for f in f_handles] # random shuffle the data INFO('Shuffling data...') random.shuffle(lines) INFO('Done.') # Set up temp files f_handles = [] for p in path: _, filename = os.path.split(p) f_handles.append(tempfile.TemporaryFile(prefix=filename + '.shuf', dir="/tmp/", mode="a+")) for line in lines: for ii, f in enumerate(f_handles): print(line[ii], file=f) # release memory lines = [] # Reset file handles [f.seek(0) for f in f_handles] return tuple(f_handles)
def test_all(): test_dir = "./tmp" if not os.path.exists(test_dir): os.makedirs(test_dir, exist_ok=True) # INFO("=" * 20) # INFO("Test transformer training...") # test_transformer_train(test_dir) # INFO("Done.") # INFO("=" * 20) # # INFO("=" * 20) # INFO("Test transformer inference...") # test_transformer_inference(test_dir) # INFO("Done.") # INFO("=" * 20) clean_tmp_dir(test_dir) INFO("=" * 20) INFO("Test DL4MT training...") test_dl4mt_train(test_dir) INFO("Done.") INFO("=" * 20) INFO("=" * 20) INFO("Test DL4MT inference...") test_dl4mt_inference(test_dir) INFO("Done.") INFO("=" * 20) rm_tmp_dir(test_dir)
def build_model(model: str, **kwargs): if model not in MODEL_CLS: raise ValueError( "Invalid model class \'{}\' provided. Only {} are supported now.".format( model, list(MODEL_CLS.keys()))) INFO("model_name: " + model) return MODEL_CLS[model](**kwargs)
def __init__(self, d_model, d_inner_hid, n_head, dim_per_head, dropout=0.1, dim_capsule=100, num_capsules=0, null_capsule=False): super(DecoderBlock, self).__init__() self.slf_attn = MultiHeadedAttention(head_count=n_head, model_dim=d_model, dropout=dropout, dim_per_head=dim_per_head) # self.ctx_attn = MultiHeadedAttention(head_count=n_head, model_dim=d_model, dropout=dropout, # dim_per_head=dim_per_head) self.pos_ffn = PositionwiseFeedForward(size=d_model, hidden_size=d_inner_hid) self.layer_norm_1 = nn.LayerNorm(d_model) self.layer_norm_2 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) # contextual capsule layer self.apply_capsule = True # self.pre_capsule_layer_norm = nn.LayerNorm(d_model) assert dim_capsule % num_capsules == 0 self.dim_per_cap = dim_capsule // num_capsules dim_per_part = dim_capsule // 3 total_num_capsules = num_capsules self.null_caps = null_capsule if null_capsule: INFO("Using Null Capsules to attract irrelevant routing.") total_num_capsules += num_capsules // 3 self.capsule_layer = ContextualCapsuleLayer( num_out_caps=total_num_capsules, num_in_caps=None, dim_in_caps=d_model, dim_out_caps=self.dim_per_cap, dim_context=d_model, num_iterations=3, share_route_weights_for_in_caps=True) self.out_and_cap_ffn = MultiInputPositionwiseFeedForward( size=d_model, hidden_size=d_inner_hid, dropout=dropout, inp_sizes=[dim_per_part, dim_per_part, dim_per_part])
def load_external_lm(self, pretrain_path=None, device=None): if pretrain_path is None: return else: INFO("Loading pretrained parameters for LM from {}".format( pretrain_path)) need_pretrain_params_prefix = [ 'decoder.cell', 'decoder.linear_hidden', ] pretrain_params = torch.load(pretrain_path, map_location=device) for name, params in pretrain_params.items(): for pp in need_pretrain_params_prefix: if name.startswith(pp): INFO("Loading param: {}...".format(name)) try: self.load_state_dict({name: params}, strict=False) except Exception as e: WARN("{}: {}".format(str(Exception), e)) INFO("Pretrained model loaded.")
def get_teacher_model(training_configs, model_configs, vocab_src, vocab_tgt, flags): # build teacher model if training_configs['use_odc']: INFO('Building teacher model...') teacher_model = build_model(n_src_vocab=vocab_src.max_n_words, n_tgt_vocab=vocab_tgt.max_n_words, padding_idx=vocab_src.pad, vocab_src=vocab_src, **model_configs) if Constants.USE_GPU: teacher_model.cuda() if training_configs.get('teacher_model_path', '') != '': teacher_model_path = training_configs['teacher_model_path'] teacher_model.load_state_dict( torch.load(teacher_model_path, map_location=Constants.CURRENT_DEVICE), strict=False) else: teacher_model_path = os.path.join(flags.saveto, flags.model_name + '.teacher.pth') INFO('Done.') else: teacher_model = None teacher_model_path = '' return teacher_model, teacher_model_path
beam_size=beam_size, model_path=model_path, use_gpu=False, config_path=config_path, saveto=saveto, max_steps=20) if __name__ == '__main__': test_dir = "./tmp" if not os.path.exists(test_dir): os.makedirs(test_dir, exist_ok=True) INFO("=" * 20) INFO("Test transformer training...") test_transformer_train(test_dir) INFO("Done.") INFO("=" * 20) INFO("=" * 20) INFO("Test transformer inference...") test_transformer_inference(test_dir) INFO("Done.") INFO("=" * 20) INFO("=" * 20) INFO("Test ensemble inference...") test_transformer_ensemble_inference(test_dir) INFO("Done.")
config_path=config_path, saveto=saveto, max_steps=20, alpha=alpha) if __name__ == '__main__': test_dir = "./tmp" parser = test_utils.build_test_argparser() args = parser.parse_args() if not os.path.exists(test_dir): os.makedirs(test_dir, exist_ok=True) INFO("=" * 20) INFO("Test transformer training...") test_transformer_train(test_dir, use_gpu=args.use_gpu) INFO("Done.") INFO("=" * 20) INFO("=" * 20) INFO("Test resuming from training...") test_transformer_train(test_dir, use_gpu=args.use_gpu) INFO("Done.") INFO("=" * 20) INFO("=" * 20) INFO("Test transformer inference...") test_transformer_inference(test_dir, use_gpu=args.use_gpu) INFO("Done.")
model_path=model_path, use_gpu=use_gpu, config_path=config_path, saveto=saveto, max_steps=20) if __name__ == '__main__': parser = test_utils.build_test_argparser() args = parser.parse_args() test_dir = "./tmp" if not os.path.exists(test_dir): os.makedirs(test_dir, exist_ok=True) INFO("=" * 20) INFO("Test training with BPE...") test_transformer_train(test_dir, use_gpu=args.use_gpu) INFO("Done.") INFO("=" * 20) INFO("=" * 20) INFO("Test inference with BPE...") test_transformer_inference(test_dir, use_gpu=args.use_gpu) INFO("Done.") INFO("=" * 20) test_utils.rm_tmp_dir(test_dir)
model_path=model_path, use_gpu=use_gpu, config_path=config_path, saveto=saveto, max_steps=20) if __name__ == '__main__': parser = test_utils.build_test_argparser() args = parser.parse_args() test_dir = "./tmp" if not os.path.exists(test_dir): os.makedirs(test_dir, exist_ok=True) INFO("=" * 20) INFO("Test DL4MT training...") test_dl4mt_train(test_dir, use_gpu=args.use_gpu) INFO("Done.") INFO("=" * 20) INFO("=" * 20) INFO("Test DL4MT inference...") test_dl4mt_inference(test_dir, use_gpu=args.use_gpu) INFO("Done.") INFO("=" * 20) test_utils.rm_tmp_dir(test_dir)
def INFO(self): if self.nmt_criterion is not None: INFO(self.nmt_criterion) if self.wordKD_criterion is not None: INFO(self.wordKD_criterion)
log_path = os.path.join(test_dir, "log") valid_path = os.path.join(test_dir, "valid") train.run(model_name=model_name, config_path=config_path, saveto=saveto, log_path=log_path, valid_path=valid_path, debug=True) if __name__ == '__main__': test_dir = "./tmp" if not os.path.exists(test_dir): os.makedirs(test_dir, exist_ok=True) INFO("=" * 20) INFO("Test transformer training...") test_transformer_train(test_dir) INFO("Done.") INFO("=" * 20) INFO("=" * 20) INFO("Test reloading from latest checkpoint...") test_transformer_train(test_dir) INFO("Done.") INFO("=" * 20) test_utils.rm_tmp_dir(test_dir)