def evaluate(model, validate_loader, condition, cuda): '''Evaluate the loss of partial data during training. ''' losses = [] for (iteration, (batch_x, global_condition)) in enumerate(validate_loader): if condition: global_condition = move_data_to_gpu(global_condition, cuda) else: global_condition = None batch_x = move_data_to_gpu(batch_x, cuda) batch_input = batch_x[:, 0:-1] output_width = batch_input.shape[-1] - model.receptive_field + 1 batch_target = batch_x[:, -output_width:] with torch.no_grad(): model.eval() batch_output = model(batch_input, global_condition) loss = _loss_func(batch_output, batch_target) losses.append(loss.data.cpu().numpy()) if iteration == 100: break return np.mean(losses)
def forward(model, generate_func, cuda, return_target): """Forward data to a model. Args: generate_func: generate function cuda: bool return_target: bool Returns: dict, keys: 'audio_name', 'output'; optional keys: 'target' """ outputs = [] audio_names = [] if return_target: targets = [] # Evaluate on mini-batch for data in generate_func: if return_target: (batch_x, batch_y, batch_audio_names) = data else: (batch_x, batch_audio_names) = data batch_x = move_data_to_gpu(batch_x, cuda) # 预测 model.eval() batch_output = model(batch_x) # 添加数据 outputs.append(batch_output.data.cpu().numpy()) audio_names.append(batch_audio_names) if return_target: targets.append(batch_y) dict = {} outputs = np.concatenate(outputs, axis=0) dict['output'] = outputs audio_names = np.concatenate(audio_names, axis=0) dict['audio_name'] = audio_names if return_target: targets = np.concatenate(targets, axis=0) dict['target'] = targets return dict
def forward(model, generate_func, cuda, has_target): """Forward data to a model. Args: model: object generate_func: generate function cuda: bool has_target: bool, True if generate_func yield (batch_x, batch_y), False if generate_func yield (batch_x) Returns: (outputs, targets) | outputs """ model.eval() outputs = [] targets = [] # Evaluate on mini-batch for data in generate_func: if has_target: (batch_x, batch_y) = data targets.append(batch_y) else: batch_x = data batch_x = move_data_to_gpu(batch_x, cuda) # Predict batch_output = model(batch_x) outputs.append(batch_output.data.cpu().numpy()) if has_target: outputs = np.concatenate(outputs, axis=0) targets = np.concatenate(targets, axis=0) return outputs, targets else: return outputs
def train(args): logging.info('config=%s', json.dumps(vars(args))) # Arguments & parameters workspace = args.workspace cuda = args.cuda # Load model model_class, model_params = MODELS[args.model] model = model_class(**{k: args.model_params[k] for k in model_params if k in args.model_params}) if args.train_model is not None: logging.info("continue training ...") model_path = os.path.join(workspace, 'logs', get_filename(__file__), args.train_model) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) logging.info("sequence length: {}".format(model.seq_len)) if cuda: model.cuda() # Paths hdf5_path = os.path.join(workspace, 'data.h5') models_dir = os.path.join(workspace, 'models', get_filename(__file__)) create_folder(models_dir) # Data generator generator = DataGenerator(hdf5_path=hdf5_path, target_device=args.target_device, train_house_list=args.train_house_list, validate_house_list=args.validate_house_list, batch_size=args.batch_size, seq_len=model.seq_len, width=args.width, binary_threshold=args.binary_threshold, balance_threshold=args.balance_threshold, balance_positive=args.balance_positive) # Optimizer learning_rate = 1e-3 optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) iteration = 0 train_bgn_time = time.time() for (batch_x, batch_y) in generator.generate(): if iteration > 1000*300: break # Evaluate if iteration % 1000 == 0: train_fin_time = time.time() tr_result_dict = evaluate(model=model, generator=generator, data_type='train', max_iteration=args.validate_max_iteration, cuda=cuda, binary=args.binary_threshold is not None) va_result_dict = evaluate(model=model, generator=generator, data_type='validate', max_iteration=args.validate_max_iteration, cuda=cuda, binary=args.binary_threshold is not None) logging.info('train: {}'.format(tr_result_dict)) logging.info('validate: {}'.format(va_result_dict)) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s, learning rate: {}'.format( iteration, train_time, validate_time, learning_rate)) logging.info('------------------------------------') train_bgn_time = time.time() # Reduce learning rate if iteration % 1000 == 0 and iteration > 0 and learning_rate > 5e-5: for param_group in optimizer.param_groups: learning_rate *= 0.9 param_group['lr'] = learning_rate batch_x = move_data_to_gpu(batch_x, cuda) batch_y = move_data_to_gpu(batch_y, cuda) # Forward forward_time = time.time() model.train() output = model(batch_x) # Loss if args.binary_threshold is not None: loss = loss_func_binary(output, batch_y) else: loss = loss_func(output, batch_y) # Backward optimizer.zero_grad() loss.backward() if args.max_norm is not None: torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.max_norm) optimizer.step() # Save model if (iteration>1) and (iteration % 1000 == 0) and ((iteration//1000+4) // (((iteration//1000-1)//100+1)*100) == 1): save_out_dict = {'iteration': iteration, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()} save_out_path = args.basename + '_{}_{}_iter_{}_wd_{}_sl_{}.tar'.format( args.target_device, args.model, iteration, args.width, model.seq_len ) create_folder(os.path.dirname(save_out_path)) torch.save(save_out_dict, save_out_path) logging.info('Save model to {}'.format(save_out_path)) iteration += 1
def train(args): # 训练相关参数设置 dataset_dir = args.dataset_dir subdir = args.subdir workspace = args.workspace filename = args.filename validate = args.validate holdout_fold = args.holdout_fold mini_data = args.mini_data cuda = args.cuda labels = config.labels if 'mobile' in subdir: devices = ['a', 'b', 'c'] else: devices = ['a'] classes_num = len(labels) # 路径设置 if mini_data: hdf5_path = os.path.join(workspace, 'features', 'logmel', subdir, 'mini_development.h5') else: hdf5_path = os.path.join(workspace, 'features', 'logmel', subdir, 'development.h5') if validate: dev_train_csv = os.path.join(dataset_dir, subdir, 'evaluation_setup', 'fold{}_train.txt'.format(holdout_fold)) dev_validate_csv = os.path.join(dataset_dir, subdir, 'evaluation_setup', 'fold{}_evaluate.txt'.format(holdout_fold)) models_dir = os.path.join(workspace, 'models', subdir, filename, 'holdout_fold={}'.format(holdout_fold)) else: dev_train_csv = None dev_validate_csv = None models_dir = os.path.join(workspace, 'models', subdir, filename, 'full_train') create_folder(models_dir) # 模型 model = Model(classes_num) # 将模型导入GPU运算 if cuda: model.cuda() # 产生数据 generator = DataGenerator(hdf5_path=hdf5_path, batch_size=batch_size, dev_train_csv=dev_train_csv, dev_validate_csv=dev_validate_csv) # 优化器,这个步骤是为了找到最值 optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) train_bgn_time = time.time() # 这里开始是训练,一共训练iteration这么多轮 for (iteration, (batch_x, batch_y)) in enumerate(generator.generate_train()): # 每一百轮就评估一下当前分数多少,loss多少 if iteration % 100 == 0: train_fin_time = time.time() (tr_acc, tr_loss) = evaluate(model=model, generator=generator, data_type='train', devices=devices, max_iteration=None, cuda=cuda) logging.info('tr_acc: {:.3f}, tr_loss: {:.3f}'.format( tr_acc, tr_loss)) # evaluate函数的到分数和误差,并显示在屏幕上 if validate: (va_acc, va_loss) = evaluate(model=model, generator=generator, data_type='validate', devices=devices, max_iteration=None, cuda=cuda) logging.info('va_acc: {:.3f}, va_loss: {:.3f}'.format( va_acc, va_loss)) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s' ''.format(iteration, train_time, validate_time)) logging.info('------------------------------------') train_bgn_time = time.time() # 每1000轮保存一次信息 if iteration % 1000 == 0 and iteration > 0: save_out_dict = {'iteration': iteration, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_out_path = os.path.join( models_dir, 'md_{}_iters.tar'.format(iteration)) torch.save(save_out_dict, save_out_path) logging.info('Model saved to {}'.format(save_out_path)) # 学习率的设置,每200轮下降一些学习率,这样有利于找到最优参数,不容易陷入局部最优 if iteration % 200 == 0 > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # 这是生成训练的样本 batch_x = move_data_to_gpu(batch_x, cuda) batch_y = move_data_to_gpu(batch_y, cuda) # 开始训练 model.train() batch_output = model(batch_x) # 计算loss loss = F.nll_loss(batch_output, batch_y) # 向后反馈,更新参数 optimizer.zero_grad() loss.backward() optimizer.step() # 这是迭代次数,可以更改这里 if iteration == 10000: break
def train(args): # Arugments & parameters dataset = args.dataset dataset_dir = args.dataset_dir workspace = args.workspace filename = args.filename batch_size = args.batch_size # Use an audio clip as a mini-batch. Must # be 1 if audio clips has different length. condition = args.condition cuda = args.cuda quantize_bins = config.quantize_bins dilations = config.dilations # Paths models_dir = os.path.join(workspace, 'models', 'dataset={}'.format(dataset), filename, 'condition={}'.format(condition)) create_folder(models_dir) # Data Generator Dataset = get_dataset(dataset) train_dataset = Dataset(dataset_dir, data_type='train') validate_dataset = Dataset(dataset_dir, data_type='validate') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True) # Model model = WaveNet( dilations, residual_channels=config.residual_channels, dilation_channels=config.dilation_channels, skip_channels=config.skip_channels, quantize_bins=config.quantize_bins, global_condition_channels=config.global_condition_channels, global_condition_cardinality=Dataset.global_condition_cardinality, use_cuda=cuda) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) train_bgn_time = time.time() iteration = 0 while True: for (batch_x, global_condition) in train_loader: '''batch_x: (batch_size, seq_len) global_condition: (batch_size,) ''' print('iteration: {}, input size: {}'.format( iteration, batch_x.shape)) # Evaluate if iteration % 1000 == 0: train_fin_time = time.time() evaluate_bgn_time = time.time() loss = evaluate(model, validate_loader, condition, cuda) print('-----------------') logging.info( 'iteration: {}, loss: {:.3f}, train_time: {:.3f}, ' 'validate time: {:.3f} s'.format( iteration, loss, train_fin_time - train_bgn_time, time.time() - evaluate_bgn_time)) train_bgn_time = time.time() # Save model if iteration % 10000 == 0: save_out_dict = { 'iteration': iteration, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_out_path = os.path.join( models_dir, 'md_{}_iters.tar'.format(iteration)) torch.save(save_out_dict, save_out_path) logging.info('Save model to {}'.format(save_out_path)) # Move data to GPU if condition: global_condition = move_data_to_gpu(global_condition, cuda) else: global_condition = None batch_x = move_data_to_gpu(batch_x, cuda) # Prepare input and target data batch_input = batch_x[:, 0:-1] output_width = batch_input.shape[-1] - model.receptive_field + 1 batch_target = batch_x[:, -output_width:] # Forward model.train() batch_output = model(batch_input, global_condition) loss = _loss_func(batch_output, batch_target) # Backward optimizer.zero_grad() loss.backward() optimizer.step() print('loss: {:.3f}'.format(loss.data.cpu().numpy())) iteration += 1