def __call__(self, noise, clean1, clean2, face1, face2): noise = xp.asarray(noise).astype(xp.float32) clean1 = xp.asarray(clean1).astype(xp.float32) clean2 = xp.asarray(clean2).astype(xp.float32) face1 = xp.asarray(face1).astype(xp.float32)[:, :, :, xp.newaxis] face2 = xp.asarray(face2).astype(xp.float32)[:, :, :, xp.newaxis] clean = xp.concatenate((clean1, clean2), axis=3) compressed_noise, _ = op.compress_audio(noise) compressed_clean, _ = op.compress_audio(clean) mask1, mask2 = self.estimate_mask(spec=compressed_noise, face1=face1, face2=face2) separated1 = op.mul(mask1, compressed_noise) separated2 = op.mul(mask2, compressed_noise) separated = F.concat((separated1, separated2), axis=3) # (6, 2, 301, 514) loss = evaluate_loss(self, separated, compressed_clean) return loss
def get_batch_mnist(ds, index, repeat, image_size=112): nt = ds.num_target # print(index) batch_size = index.shape[0] return_x = np.empty((batch_size, 1, image_size, image_size)) return_t = np.zeros((batch_size, nt)) for bi in range(batch_size): return_x[bi] = ds[index[bi]][0] return_t[bi] = ds[index[bi]][1] return_x = return_x.reshape(batch_size, 1, image_size, image_size).astype(np.float32) return_t = return_t.astype(np.float32) return_x = xp.asarray(xp.tile(return_x, (repeat, 1, 1, 1))) return_t = xp.asarray(xp.tile(return_t, (repeat, 1))) return return_x, return_t
def __call__(self, noise, clean): noise = xp.asarray(noise).astype(xp.float32) clean = xp.asarray(clean).astype(xp.float32) compressed_noise, _ = op.compress_audio(noise) compressed_clean, _ = op.compress_audio(clean) mask, _ = self.estimate_mask(spec=compressed_noise) separated = op.mul(mask, compressed_noise) loss = evaluate_loss(self, separated, compressed_clean) return loss