Beispiel #1
0
    def __call__(self, noise, clean1, clean2, face1, face2):

        noise = xp.asarray(noise).astype(xp.float32)
        clean1 = xp.asarray(clean1).astype(xp.float32)
        clean2 = xp.asarray(clean2).astype(xp.float32)
        face1 = xp.asarray(face1).astype(xp.float32)[:, :, :, xp.newaxis]
        face2 = xp.asarray(face2).astype(xp.float32)[:, :, :, xp.newaxis]

        clean = xp.concatenate((clean1, clean2), axis=3)

        compressed_noise, _ = op.compress_audio(noise)
        compressed_clean, _ = op.compress_audio(clean)

        mask1, mask2 = self.estimate_mask(spec=compressed_noise,
                                          face1=face1,
                                          face2=face2)

        separated1 = op.mul(mask1, compressed_noise)
        separated2 = op.mul(mask2, compressed_noise)

        separated = F.concat((separated1, separated2),
                             axis=3)  # (6, 2, 301, 514)

        loss = evaluate_loss(self, separated, compressed_clean)
        return loss
Beispiel #2
0
    def __call__(self, noise, clean):

        noise = xp.asarray(noise).astype(xp.float32)
        clean = xp.asarray(clean).astype(xp.float32)

        compressed_noise, _ = op.compress_audio(noise)
        compressed_clean, _ = op.compress_audio(clean)

        mask, _ = self.estimate_mask(spec=compressed_noise)

        separated = op.mul(mask, compressed_noise)

        loss = evaluate_loss(self, separated, compressed_clean)

        return loss
Beispiel #3
0
def predict(model):

    print("estimate mask...")
    if env.INPUT_FACE == 0:
        noise, clean1 = dataset.load_dataset_audio(
            list([
                env.TRAIN + 7, env.TRAIN + 1, env.TRAIN + 8, env.TRAIN + 3,
                env.TRAIN + 4, env.TRAIN + 5, env.TRAIN + 6
            ]))
        compressed_noise, _ = op.compress_audio(noise)

        mask1, mask2 = model.estimate_mask(spec=compressed_noise)
    elif env.INPUT_FACE == 1:
        noise, clean1, face1 = dataset.load_dataset_single(
            list([
                env.TRAIN, env.TRAIN + 1, env.TRAIN + 2, env.TRAIN + 3,
                env.TRAIN + 4, env.TRAIN + 5, env.TRAIN + 6
            ]))
        compressed_noise, _ = op.compress_audio(noise)

        mask1, mask2 = model.estimate_mask(spec=compressed_noise, face=face1)
    else:
        noise, clean1, clean2, face1, face2 = dataset.load_dataset_double(
            list([
                env.TRAIN, env.TRAIN + 1, env.TRAIN + 2, env.TRAIN + 3,
                env.TRAIN + 4, env.TRAIN + 5, env.TRAIN + 6
            ]))
        compressed_noise, _ = op.compress_audio(noise)
        mask1, mask2 = model.estimate_mask(spec=compressed_noise,
                                           face1=face1,
                                           face2=face2)

    print("mul mask...")
    compressed_separated1 = op.mul(mask1, compressed_noise)
    compressed_separated2 = op.mul(mask2, compressed_noise)

    compressed_clean1, _ = op.compress_audio(clean1)
    loss = evaluate_loss(model, compressed_separated1, compressed_clean1)
    print(loss)

    print("reconstruct audio...")
    n = op.reconstruct_audio_complex(chainer.cuda.to_cpu(compressed_noise))
    c1 = op.reconstruct_audio_complex(chainer.cuda.to_cpu(compressed_clean1))
    # c2 = op.reconstruct_audio_complex(chainer.cuda.to_cpu(compressed_clean2))
    y1 = op.reconstruct_audio_complex(
        chainer.cuda.to_cpu(compressed_separated1.data))
    y2 = op.reconstruct_audio_complex(
        chainer.cuda.to_cpu(compressed_separated2.data))

    print("save files...")
    for i in range(n.shape[2]):

        print("{0}/{1}".format(i + 1, n.shape[2]))

        util.istft_and_save(
            "{}/{}-synthesis.wav".format(os.environ['RESULT_DIR'], i), n[:, :,
                                                                         i])
        util.istft_and_save(
            "{}/{}-clean1.wav".format(os.environ['RESULT_DIR'], i), c1[:, :,
                                                                       i])
        # util.istft_and_save("{}/{}-clean2.wav".format(os.environ['RESULT_DIR'], i), c2[:, :, i])
        util.istft_and_save(
            "{}/{}-separated1.wav".format(os.environ['RESULT_DIR'], i),
            y1[:, :, i])
        util.istft_and_save(
            "{}/{}-separated2.wav".format(os.environ['RESULT_DIR'], i),
            y2[:, :, i])