num_workers=32, pin_memory=True) dev_dataset = AudioDataset(config.data, 'dev') dev_data = torch.utils.data.DataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True) steps = len(train_dataset) # 优化器设置 opt = torch.optim.Adam(model.parameters(), lr=learning_rate) param_group = opt.param_groups n_params, enc, dec = count_parameters(model) print('# the number of parameters in the whole model: %d' % n_params) summary_writer = SummaryWriter(os.path.join(home_dir, 'logs/')) old_wer = 100 for epoch in range(epochs): i = 0 total_loss = 0 nums_batchs = len(train_dataset.wav_ids) // batch_size prefetcher = data_prefetcher(training_data) while True: i += 1 inputs_x, targets_y, origin_length, ctc_length, targets_length = prefetcher.next( ) if inputs_x is None: break
def main(): parser = argparse.ArgumentParser() parser.add_argument('-config', type=str, default='config/aishell.yaml') parser.add_argument('-log', type=str, default='train.log') opt = parser.parse_args() configfile = open(opt.config) config = AttrDict(yaml.load(configfile, Loader=yaml.FullLoader)) exp_name = os.path.join('aishell/rnnt-model') if not os.path.isdir(exp_name): os.makedirs(exp_name) logger = init_logger(os.path.join(exp_name, opt.log)) shutil.copyfile(opt.config, os.path.join(exp_name, 'config.yaml')) logger.info('Save config info.') # 测试数据集 test_dataset = AudioDataset(config.data, 'test') validate_data = torch.utils.data.DataLoader( test_dataset, batch_size=config.data.batch_size, shuffle=False, num_workers=4) logger.info('Load Dev Set!') if config.training.num_gpu > 0: torch.cuda.manual_seed(config.training.seed) torch.backends.cudnn.deterministic = True else: torch.manual_seed(config.training.seed) logger.info('Set random seed: %d' % config.training.seed) model = Transducer(config.model) if config.training.load_model: checkpoint = torch.load(config.training.load_model) model.encoder.load_state_dict(checkpoint['encoder']) model.decoder.load_state_dict(checkpoint['decoder']) model.joint.load_state_dict(checkpoint['joint']) logger.info('Loaded model from %s' % config.training.load_model) elif config.training.load_encoder or config.training.load_decoder: if config.training.load_encoder: checkpoint = torch.load(config.training.load_encoder) model.encoder.load_state_dict(checkpoint['encoder']) logger.info('Loaded encoder from %s' % config.training.load_encoder) if config.training.load_decoder: checkpoint = torch.load(config.training.load_decoder) model.decoder.load_state_dict(checkpoint['decoder']) logger.info('Loaded decoder from %s' % config.training.load_decoder) if config.training.num_gpu > 0: model = model.cuda() if config.training.num_gpu > 1: device_ids = list(range(config.training.num_gpu)) model = torch.nn.DataParallel(model, device_ids=device_ids) logger.info('Loaded the model to %d GPUs' % config.training.num_gpu) n_params, enc, dec = count_parameters(model) logger.info('# the number of parameters in the whole model: %d' % n_params) logger.info('# the number of parameters in the Encoder: %d' % enc) logger.info('# the number of parameters in the Decoder: %d' % dec) logger.info('# the number of parameters in the JointNet: %d' % (n_params - dec - enc)) cer = test(config, model, test_dataset, validate_data, logger) logger.info('# Test CER: %.5f%%' % (cer))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-config', type=str, default='config/aishell.yaml') parser.add_argument('-log', type=str, default='train.log') parser.add_argument('-mode', type=str, default='retrain') opt = parser.parse_args() configfile = open(opt.config) config = AttrDict(yaml.load(configfile, Loader=yaml.FullLoader)) exp_name = os.path.join('egs', config.data.name, 'exp', config.training.save_model) if not os.path.isdir(exp_name): os.makedirs(exp_name) logger = init_logger(os.path.join(exp_name, opt.log)) shutil.copyfile(opt.config, os.path.join(exp_name, 'config.yaml')) logger.info('Save config info.') num_workers = config.training.num_gpu * 2 train_dataset = AudioDataset(config.data, 'train') training_data = torch.utils.data.DataLoader( train_dataset, batch_size=config.data.batch_size * config.training.num_gpu, shuffle=config.data.shuffle, num_workers=num_workers) logger.info('Load Train Set!') dev_dataset = AudioDataset(config.data, 'dev') validate_data = torch.utils.data.DataLoader( dev_dataset, batch_size=config.data.batch_size * config.training.num_gpu, shuffle=False, num_workers=num_workers) logger.info('Load Dev Set!') if config.training.num_gpu > 0: torch.cuda.manual_seed(config.training.seed) torch.backends.cudnn.deterministic = True else: torch.manual_seed(config.training.seed) logger.info('Set random seed: %d' % config.training.seed) model = Transducer(config.model) if config.training.load_model: checkpoint = torch.load(config.training.load_model) model.encoder.load_state_dict(checkpoint['encoder']) model.decoder.load_state_dict(checkpoint['decoder']) model.joint.load_state_dict(checkpoint['joint']) logger.info('Loaded model from %s' % config.training.load_model) elif config.training.load_encoder or config.training.load_decoder: if config.training.load_encoder: checkpoint = torch.load(config.training.load_encoder) model.encoder.load_state_dict(checkpoint['encoder']) logger.info('Loaded encoder from %s' % config.training.load_encoder) if config.training.load_decoder: checkpoint = torch.load(config.training.load_decoder) model.decoder.load_state_dict(checkpoint['decoder']) logger.info('Loaded decoder from %s' % config.training.load_decoder) if config.training.num_gpu > 0: model = model.cuda() if config.training.num_gpu > 1: device_ids = list(range(config.training.num_gpu)) model = torch.nn.DataParallel(model, device_ids=device_ids) logger.info('Loaded the model to %d GPUs' % config.training.num_gpu) n_params, enc, dec = count_parameters(model) logger.info('# the number of parameters in the whole model: %d' % n_params) logger.info('# the number of parameters in the Encoder: %d' % enc) logger.info('# the number of parameters in the Decoder: %d' % dec) logger.info('# the number of parameters in the JointNet: %d' % (n_params - dec - enc)) optimizer = Optimizer(model.parameters(), config.optim) logger.info('Created a %s optimizer.' % config.optim.type) if opt.mode == 'continue': optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] logger.info('Load Optimizer State!') else: start_epoch = 0 # create a visualizer if config.training.visualization: visualizer = SummaryWriter(os.path.join(exp_name, 'log')) logger.info('Created a visualizer.') else: visualizer = None for epoch in range(start_epoch, config.training.epochs): train(epoch, config, model, training_data, optimizer, logger, visualizer) if config.training.eval_or_not: _ = eval(epoch, config, model, validate_data, logger, visualizer) save_name = os.path.join( exp_name, '%s.epoch%d.chkpt' % (config.training.save_model, epoch)) save_model(model, optimizer, config, save_name) logger.info('Epoch %d model has been saved.' % epoch) if epoch >= config.optim.begin_to_adjust_lr: optimizer.decay_lr() # early stop if optimizer.lr < 1e-6: logger.info('The learning rate is too low to train.') break logger.info('Epoch %d update learning rate: %.6f' % (epoch, optimizer.lr)) logger.info('The training process is OVER!')