def clean_Datafolder(input_dir, output_dir, model, batch_size, cuda=False, samples=None): # Load dataset make_identical_dir_structure(input_dir, output_dir) test_set = Datafolder_soundfiles(y_paths=walk_dir(input_dir), transform=model.transform) output_paths = format_paths(test_set.y_paths, input_dir, output_dir, extention='.wav') sampler = None if samples is not None: if type(slice(0)) == type(samples): sampler = SubsetSampler(indices=range(test_set.length), slice_=samples) else: sampler = SubsetSampler(indices=samples) test_data_loader = DataLoader(test_set, batch_size=batch_size, num_workers=2, pin_memory=cuda, sampler=sampler) if cuda: model.cuda() model.eval() pool = Pool(processes=2) jobs = [] for (indexs, (data, Y_m, Y_a, length)) in test_data_loader: try: mask = compute_mask_batch(model, data, cuda=cuda) except: for index in indexs: print(index, output_paths[index], 'failed') else: mask, Y_m, Y_a, length = (k.numpy() for k in (mask, Y_m, Y_a, length)) for job in jobs: job.wait() jobs = [] for I, index in enumerate(indexs): # clean_sample_(model, mask[I], Y_m[I], Y_a[I], length[I], output_paths[index]) jobs.append( pool.apply_async( clean_sample_(model, mask[I], Y_m[I], Y_a[I], length[I], output_paths[index]))) pool.close() pool.join() print('Finished clearning', input_dir)
def train(): """ training """ model.train() epoch_loss, t0 = [], time.time() training_data_loader = DataLoader( training_set, batch_size=batch_size, num_workers=2, pin_memory=cuda, sampler=SubsetSampler(indices=sample_indecies)) for i_batch, (indexs, (data, targetY, targetX)) in enumerate(training_data_loader, 1): data, targetY, targetX = Variable(data), Variable(targetY), Variable(targetX) if cuda: data = data.cuda(async=True) targetY = targetY.cuda(async=True) targetX = targetX.cuda(async=True) optimizer.zero_grad() mask = model(data) # prediction loss = criterion(apply_mask(mask, targetY), targetX) epoch_loss.append(loss.data[0]) loss.backward() optimizer.step() print("===> Epoch {:2} {:4.1f}% Loss: {:.4e}".format( epoch, i_batch / batch_per_epoch * 100, loss.data[0])) # assume loss is emperical mean of the batch and i.i.d loss, loss_std, t = np.mean(epoch_loss), np.std(epoch_loss) * batch_size**.5, int(time.time() - t0) print("Epoch {} Complete: Avg. Loss: {:.4e} {:.4e} {}".format(epoch, loss, loss_std, int(t / 60))) print(epoch, loss, loss_std, t, sep=',', end=',', file=open(logpath, 'a'))