def finetune(args): # Construct Solver # data token2idx_src, idx2token_src = load_vocab(args.vocab_src) token2idx_tgt, idx2token_tgt = load_vocab(args.vocab_tgt) args.n_src = len(idx2token_src) args.n_tgt = len(idx2token_tgt) tr_dataset = VQ_Pred_Dataset(args.train_src, args.train_tgt, token2idx_src, token2idx_tgt, args.batch_size, args.maxlen_in, args.maxlen_out, down_sample_rate=args.down_sample_rate) cv_dataset = VQ_Pred_Dataset(args.valid_src, args.valid_tgt, token2idx_src, token2idx_tgt, args.batch_size, args.maxlen_in, args.maxlen_out, down_sample_rate=args.down_sample_rate) tr_loader = DataLoader(tr_dataset, batch_size=1, collate_fn=f_xy_pad, num_workers=args.num_workers, shuffle=args.shuffle) cv_loader = DataLoader(cv_dataset, batch_size=1, collate_fn=f_xy_pad, num_workers=args.num_workers) # load dictionary and generate char_list, sos_id, eos_id data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} if args.structure == 'BERT': from mask_lm.Mask_LM import Mask_LM as Model from mask_lm.solver import Mask_LM_Solver as Solver model = Model.create_model(args) print(model) model.cuda() # optimizer optimizier = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.k, args.d_model, args.warmup_steps) # solver solver = Solver(data, model, optimizier, args) solver.train()
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_loss = float('inf') writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: # model encoder = Encoder(n_src_vocab, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) decoder = Decoder( sos_id, eos_id, n_tgt_vocab, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(encoder, decoder) # print(model) # model = nn.DataParallel(model) # optimizer optimizer = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09)) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] # Move to GPU, if available model = model.to(device) # Custom dataloaders train_dataset = AiChallenger2017Dataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=pad_collate, shuffle=True, num_workers=args.num_workers) valid_dataset = AiChallenger2017Dataset('valid') valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, collate_fn=pad_collate, shuffle=False, num_workers=args.num_workers) # Epochs for epoch in range(start_epoch, args.epochs): # One epoch's training train_loss = train(train_loader=train_loader, model=model, optimizer=optimizer, epoch=epoch, logger=logger, writer=writer) writer.add_scalar('epoch/train_loss', train_loss, epoch) writer.add_scalar('epoch/learning_rate', optimizer.lr, epoch) print('\nLearning rate: {}'.format(optimizer.lr)) print('Step num: {}\n'.format(optimizer.step_num)) # One epoch's validation valid_loss = valid(valid_loader=valid_loader, model=model, logger=logger) writer.add_scalar('epoch/valid_loss', valid_loss, epoch) # Check if there was an improvement is_best = valid_loss < best_loss best_loss = min(valid_loss, best_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, optimizer, best_loss, is_best)
def main(): global char2index global index2char global SOS_token global EOS_token global PAD_token parser = argparse.ArgumentParser(description='Speech hackathon Baseline') parser.add_argument('--batch_size', type=int, default=32, help='batch size in training (default: 32)') parser.add_argument( '--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=10, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=0.0001, help='learning rate (default: 0.0001)') parser.add_argument('--teacher_forcing', type=float, default=0.5, help='teacher forcing ratio in decoder (default: 0.5)') parser.add_argument('--max_len', type=int, default=WORD_MAXLEN, help='maximum characters of sentence (default: 80)') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local') parser.add_argument('--mode', type=str, default='train') parser.add_argument("--pause", type=int, default=0) parser.add_argument( '--word', action='store_true', help='Train/Predict model using word based label (default: False)') parser.add_argument('--gen_label_index', action='store_true', help='Generate word label index map(default: False)') parser.add_argument('--iteration', type=str, help='Iteratiom') parser.add_argument('--premodel_session', type=str, help='Session name of premodel') # transformer model parameter parser.add_argument('--d_model', type=int, default=128, help='transformer_d_model') parser.add_argument('--n_head', type=int, default=8, help='transformer_n_head') parser.add_argument('--num_encoder_layers', type=int, default=4, help='num_encoder_layers') parser.add_argument('--num_decoder_layers', type=int, default=4, help='transformer_num_decoder_layers') parser.add_argument('--dim_feedforward', type=int, default=2048, help='transformer_d_model') parser.add_argument('--dropout', type=float, default=0.1, help='transformer_dropout') # transformer warmup parameter parser.add_argument('--warmup_multiplier', type=int, default=3, help='transformer_warmup_multiplier') parser.add_argument('--warmup_epoch', type=int, default=10, help='transformer_warmup_epoch') args = parser.parse_args() char_loader = CharLabelLoader() char_loader.load_char2index('./hackathon.labels') label_loader = char_loader if args.word: if args.gen_label_index: generate_word_label_index_file(char_loader, TRAIN_LABEL_CHAR_PATH) from subprocess import call call(f'cat {TRAIN_LABEL_CHAR_PATH}', shell=True) # ??? ??? ??? ?? word_loader = CharLabelLoader() word_loader.load_char2index('./hackathon.pos.labels') label_loader = word_loader if os.path.exists(TRAIN_LABEL_CHAR_PATH): generate_word_label_file(char_loader, word_loader, TRAIN_LABEL_POS_PATH, TRAIN_LABEL_CHAR_PATH) char2index = label_loader.char2index index2char = label_loader.index2char SOS_token = char2index['<s>'] EOS_token = char2index['</s>'] PAD_token = char2index['_'] random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') ############ model print("model: transformer") # model = Transformer(d_model= args.d_model, n_head= args.n_head, num_encoder_layers= args.num_encoder_layers, num_decoder_layers= args.num_decoder_layers, # dim_feedforward= args.dim_feedforward, dropout= args.dropout, vocab_size= len(char2index), sound_maxlen= SOUND_MAXLEN, word_maxlen= WORD_MAXLEN) encoder = Encoder(d_input=128, n_layers=6, n_head=4, d_k=128, d_v=128, d_model=128, d_inner=2048, dropout=0.1, pe_maxlen=SOUND_MAXLEN) decoder = Decoder(sos_id=SOS_token, eos_id=EOS_token, n_tgt_vocab=len(char2index), d_word_vec=128, n_layers=6, n_head=4, d_k=128, d_v=128, d_model=128, d_inner=2048, dropout=0.1, tgt_emb_prj_weight_sharing=True, pe_maxlen=SOUND_MAXLEN) model = Transformer(encoder, decoder) optimizer = TransformerOptimizer( torch.optim.Adam(model.parameters(), lr=0.0004, betas=(0.9, 0.98), eps=1e-09)) ############/ for param in model.parameters(): param.data.uniform_(-0.08, 0.08) model = nn.DataParallel(model).to(device) """ optimizer = optim.Adam(model.module.parameters(), lr=args.lr) scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.max_epochs) scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=args.warmup_multiplier, total_epoch=args.warmup_epoch, after_scheduler=scheduler_cosine) criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device) """ bind_model(model, optimizer) if args.pause == 1: nsml.paused(scope=locals()) if args.mode != "train": return data_list = os.path.join(DATASET_PATH, 'train_data', 'data_list.csv') wav_paths = list() script_paths = list() with open(data_list, 'r') as f: for line in f: # line: "aaa.wav,aaa.label" wav_path, script_path = line.strip().split(',') wav_paths.append(os.path.join(DATASET_PATH, 'train_data', wav_path)) script_paths.append( os.path.join(DATASET_PATH, 'train_data', script_path)) best_loss = 1e10 begin_epoch = 0 # load all target scripts for reducing disk i/o # target_path = os.path.join(DATASET_PATH, 'train_label') target_path = TRAIN_LABEL_CHAR_PATH if args.word: target_path = TRAIN_LABEL_POS_PATH load_targets(target_path) train_batch_num, train_dataset_list, valid_dataset = split_dataset( args, wav_paths, script_paths, valid_ratio=0.05) if args.iteration: if args.premodel_session: nsml.load(args.iteration, session=args.premodel_session) logger.info(f'Load {args.premodel_session} {args.iteration}') else: nsml.load(args.iteration) logger.info(f'Load {args.iteration}') logger.info('start') train_begin = time.time() for epoch in range(begin_epoch, args.max_epochs): # learning rate scheduler train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers) train_loader.start() train_loss, train_cer = train(model, train_batch_num, train_queue, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing) logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer)) train_loader.join() print("~~~~~~~~~~~~") if epoch == 10 or (epoch > 48 and epoch % 10 == 9): valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, device, args.max_len, args.batch_size) logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) valid_loader.join() nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer, eval__loss=eval_loss, eval__cer=eval_cer) best_model = (eval_loss < best_loss) nsml.save(args.save_name) if best_model: nsml.save('best') best_loss = eval_loss
def train_net(args): # 为了保证程序执行结果一致, 给随机化设定种子 torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 writer = SummaryWriter() if checkpoint is None: # model encoder = Encoder(Config.vocab_size, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) decoder = Decoder(Config.sos_id, Config.eos_id, Config.vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(encoder, decoder) # optimizer optimizer = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09)) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 model = checkpoint['model'] optimizer = checkpoint['optimizer'] # Move to GPU, if available model = model.to(Config.device) # Custom dataloaders 数据的加载 注意这里指定了一个参数collate_fn代表的数据需要padding train_dataset = TranslateDataset() train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=pad_collate, shuffle=True, num_workers=args.num_workers) # Epochs Loss_list = [] for epoch in range(start_epoch, args.epochs): # One epoch's training train_loss = train(train_loader=train_loader, model=model, optimizer=optimizer, epoch=epoch, logger=logger, writer=writer) l = str(train_loss) Loss_list.append(l) l_temp = l + '\n' with open('loss_epoch.txt', 'a+') as f: f.write(l_temp) writer.add_scalar('epoch/train_loss', train_loss, epoch) writer.add_scalar('epoch/learning_rate', optimizer.lr, epoch) print('\nLearning rate: {}'.format(optimizer.lr)) print('Step num: {}\n'.format(optimizer.step_num)) # Save checkpoint save_checkpoint(epoch, model, optimizer, train_loss) with open('loss.txt', 'w') as f: f.write('\n'.join(Loss_list))
def main(args): # Construct Solver # data token2idx, idx2token = load_vocab(args.vocab) args.vocab_size = len(token2idx) args.sos_id = token2idx['<sos>'] args.eos_id = token2idx['<eos>'] tr_dataset = AudioDataset(args.train_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) cv_dataset = AudioDataset(args.valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) tr_loader = AudioDataLoader(tr_dataset, batch_size=1, token2idx=token2idx, label_type=args.label_type, num_workers=args.num_workers, shuffle=args.shuffle, LFR_m=args.LFR_m, LFR_n=args.LFR_n) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, token2idx=token2idx, label_type=args.label_type, num_workers=args.num_workers, LFR_m=args.LFR_m, LFR_n=args.LFR_n) # load dictionary and generate char_list, sos_id, eos_id data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} if args.structure == 'transformer': from transformer.Transformer import Transformer from transformer.solver import Transformer_Solver as Solver model = Transformer.create_model(args) elif args.structure == 'transformer-ctc': from transformer.Transformer import CTC_Transformer as Transformer from transformer.solver import Transformer_CTC_Solver as Solver model = Transformer.create_model(args) elif args.structure == 'conv-transformer-ctc': from transformer.Transformer import Conv_CTC_Transformer as Transformer from transformer.solver import Transformer_CTC_Solver as Solver model = Transformer.create_model(args) elif args.structure == 'cif': from transformer.CIF_Model import CIF_Model from transformer.solver import CIF_Solver as Solver model = CIF_Model.create_model(args) print(model) model.cuda() # optimizer optimizier = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.k, args.d_model, args.warmup_steps) # solver solver = Solver(data, model, optimizier, args) solver.train()
eos_id, vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(encoder, decoder) optimizer = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.k, args.d_model, args.warmup_steps) print(args.k) print(args.d_model) print(args.warmup_steps) lr_list = [] for step_num in range(1, 50000): # print(step_num) lr_1 = k * init_lr * min(step_num**(-0.5), step_num * (warmup_steps**(-1.5))) optimizer.step() lr_2 = optimizer.lr # print(lr_1) # print(lr_2)
def __init__(self): dir_path = os.path.dirname(os.path.realpath(__file__)) self.train_json = os.path.join(dir_path, self.train_json) self.valid_json = os.path.join(dir_path, self.valid_json) self.dict_txt = os.path.join(dir_path, self.dict_txt) self.char_list, self.sos_id, self.eos_id = process_dict(self.dict_txt) self.vocab_size = len(self.char_list) self.tr_dataset = AudioDataset(self.train_json, self.batch_size, self.maxlen_in, self.maxlen_out, batch_frames=self.batch_frames) self.cv_dataset = AudioDataset(self.valid_json, self.batch_size, self.maxlen_in, self.maxlen_out, batch_frames=self.batch_frames) self.tr_loader = AudioDataLoader(self.tr_dataset, batch_size=1, num_workers=self.num_workers, shuffle=self.shuffle, LFR_m=self.LFR_m, LFR_n=self.LFR_n) self.cv_loader = AudioDataLoader(self.cv_dataset, batch_size=1, num_workers=self.num_workers, LFR_m=self.LFR_m, LFR_n=self.LFR_n) self.data = {'tr_loader': self.tr_loader, 'cv_loader': self.cv_loader} self.encoder = Encoder(self.d_input * self.LFR_m, self.n_layers_enc, self.n_head, self.d_k, self.d_v, self.d_model, self.d_inner, dropout=self.dropout, pe_maxlen=self.pe_maxlen) self.decoder = Decoder( self.sos_id, self.eos_id, self.vocab_size, self.d_word_vec, self.n_layers_dec, self.n_head, self.d_k, self.d_v, self.d_model, self.d_inner, dropout=self.dropout, tgt_emb_prj_weight_sharing=self.tgt_emb_prj_weight_sharing, pe_maxlen=self.pe_maxlen) self.tr_loss = torch.Tensor(self.epochs) self.cv_loss = torch.Tensor(self.epochs) self.model = Transformer(self.encoder, self.decoder) self.optimizer = TransformerOptimizer( torch.optim.Adam(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09), self.k, self.d_model, self.warmup_steps) self._reset()
class SpeechTransformerTrainConfig: # Low Frame Rate LFR_m = 4 LFR_n = 3 # Network Architecture - Encoder d_input = 80 n_layers_enc = 6 n_head = 8 d_k = 64 d_v = 64 d_model = 512 d_inner = 2048 dropout = 0.1 pe_maxlen = 5000 d_word_vec = 512 n_layers_dec = 6 tgt_emb_prj_weight_sharing = 1 label_smoothing = 0.1 # minibatch shuffle = 1 batch_size = 16 batch_frames = 15000 maxlen_in = 800 maxlen_out = 150 num_workers = 4 # optimizer k = 0.2 warmup_steps = 1 # solver configs epochs = 5 save_folder = "output_data" checkpoint = False continue_from = False model_path = 'final.pth.tar' print_freq = 10 visdom = 0 visdom_lr = 0 visdom_epoch = 0 visdom_id = 0 # The input files. Their paths are relative to the directory of __file__ train_json = "input_data/train/data.json" valid_json = "input_data/dev/data.json" dict_txt = "input_data/lang_1char/train_chars.txt" def __init__(self): dir_path = os.path.dirname(os.path.realpath(__file__)) self.train_json = os.path.join(dir_path, self.train_json) self.valid_json = os.path.join(dir_path, self.valid_json) self.dict_txt = os.path.join(dir_path, self.dict_txt) self.char_list, self.sos_id, self.eos_id = process_dict(self.dict_txt) self.vocab_size = len(self.char_list) self.tr_dataset = AudioDataset(self.train_json, self.batch_size, self.maxlen_in, self.maxlen_out, batch_frames=self.batch_frames) self.cv_dataset = AudioDataset(self.valid_json, self.batch_size, self.maxlen_in, self.maxlen_out, batch_frames=self.batch_frames) self.tr_loader = AudioDataLoader(self.tr_dataset, batch_size=1, num_workers=self.num_workers, shuffle=self.shuffle, LFR_m=self.LFR_m, LFR_n=self.LFR_n) self.cv_loader = AudioDataLoader(self.cv_dataset, batch_size=1, num_workers=self.num_workers, LFR_m=self.LFR_m, LFR_n=self.LFR_n) self.data = {'tr_loader': self.tr_loader, 'cv_loader': self.cv_loader} self.encoder = Encoder(self.d_input * self.LFR_m, self.n_layers_enc, self.n_head, self.d_k, self.d_v, self.d_model, self.d_inner, dropout=self.dropout, pe_maxlen=self.pe_maxlen) self.decoder = Decoder( self.sos_id, self.eos_id, self.vocab_size, self.d_word_vec, self.n_layers_dec, self.n_head, self.d_k, self.d_v, self.d_model, self.d_inner, dropout=self.dropout, tgt_emb_prj_weight_sharing=self.tgt_emb_prj_weight_sharing, pe_maxlen=self.pe_maxlen) self.tr_loss = torch.Tensor(self.epochs) self.cv_loss = torch.Tensor(self.epochs) self.model = Transformer(self.encoder, self.decoder) self.optimizer = TransformerOptimizer( torch.optim.Adam(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09), self.k, self.d_model, self.warmup_steps) self._reset() def _reset(self): self.prev_val_loss = float("inf") self.best_val_loss = float("inf") self.halving = False def _run_one_epoch(self, cross_valid=False): total_loss = 0 data_loader = self.tr_loader if not cross_valid else self.cv_loader for i, (data) in enumerate(data_loader): padded_input, input_lengths, padded_target = data padded_input = padded_input.cuda() input_lengths = input_lengths.cuda() padded_target = padded_target.cuda() pred, gold = self.model(padded_input, input_lengths, padded_target) loss, n_correct = cal_performance(pred, gold, smoothing=self.label_smoothing) if not cross_valid: self.optimizer.zero_grad() loss.backward() self.optimizer.step() total_loss += loss.item() non_pad_mask = gold.ne(IGNORE_ID) n_word = non_pad_mask.sum().item() return total_loss / (i + 1) def train(self, epoch=1): self.model.train() tr_avg_loss = self._run_one_epoch() # Cross validation self.model.eval() val_loss = self._run_one_epoch(cross_valid=True) self.tr_loss[epoch] = tr_avg_loss self.cv_loss[epoch] = val_loss if val_loss < self.best_val_loss: self.best_val_loss = val_loss