예제 #1
0
def clean_Datafolder(input_dir,
                     output_dir,
                     model,
                     batch_size,
                     cuda=False,
                     samples=None):
    # Load dataset
    make_identical_dir_structure(input_dir, output_dir)
    test_set = Datafolder_soundfiles(y_paths=walk_dir(input_dir),
                                     transform=model.transform)
    output_paths = format_paths(test_set.y_paths,
                                input_dir,
                                output_dir,
                                extention='.wav')

    sampler = None
    if samples is not None:
        if type(slice(0)) == type(samples):
            sampler = SubsetSampler(indices=range(test_set.length),
                                    slice_=samples)
        else:
            sampler = SubsetSampler(indices=samples)

    test_data_loader = DataLoader(test_set,
                                  batch_size=batch_size,
                                  num_workers=2,
                                  pin_memory=cuda,
                                  sampler=sampler)

    if cuda:
        model.cuda()
    model.eval()

    pool = Pool(processes=2)
    jobs = []
    for (indexs, (data, Y_m, Y_a, length)) in test_data_loader:
        try:
            mask = compute_mask_batch(model, data, cuda=cuda)
        except:
            for index in indexs:
                print(index, output_paths[index], 'failed')
        else:
            mask, Y_m, Y_a, length = (k.numpy()
                                      for k in (mask, Y_m, Y_a, length))

            for job in jobs:
                job.wait()
            jobs = []
            for I, index in enumerate(indexs):
                # clean_sample_(model, mask[I], Y_m[I], Y_a[I], length[I], output_paths[index])
                jobs.append(
                    pool.apply_async(
                        clean_sample_(model, mask[I], Y_m[I], Y_a[I],
                                      length[I], output_paths[index])))
    pool.close()
    pool.join()
    print('Finished clearning', input_dir)
예제 #2
0
def train():
    """
    training
    """
    model.train()
    epoch_loss, t0 = [], time.time()

    training_data_loader = DataLoader(
        training_set, batch_size=batch_size, num_workers=2, pin_memory=cuda,
        sampler=SubsetSampler(indices=sample_indecies))

    for i_batch, (indexs, (data, targetY, targetX)) in enumerate(training_data_loader, 1):
        data, targetY, targetX = Variable(data), Variable(targetY), Variable(targetX)
        if cuda:
            data = data.cuda(async=True)
            targetY = targetY.cuda(async=True)
            targetX = targetX.cuda(async=True)

        optimizer.zero_grad()
        mask = model(data)  # prediction
        loss = criterion(apply_mask(mask, targetY), targetX)
        epoch_loss.append(loss.data[0])
        loss.backward()
        optimizer.step()
        print("===> Epoch {:2} {:4.1f}% Loss: {:.4e}".format(
            epoch, i_batch / batch_per_epoch * 100, loss.data[0]))

    # assume loss is emperical mean of the batch and i.i.d
    loss, loss_std, t = np.mean(epoch_loss), np.std(epoch_loss) * batch_size**.5, int(time.time() - t0)
    print("Epoch {} Complete: Avg. Loss: {:.4e} {:.4e} {}".format(epoch, loss, loss_std, int(t / 60)))
    print(epoch, loss, loss_std, t,
          sep=',', end=',', file=open(logpath, 'a'))