예제 #1
0
 def step(self, data):
     x = cuda_variable(data['input'])
     target = cuda_variable(data['target']).long()
     recon_batch, mu, logvar = self(x)
     batch_dim = x.shape[0]
     recon_batch_flat = recon_batch.view(batch_dim, -1)
     target_flat = target.view(batch_dim, -1)
     loss = self.loss_function(recon_batch_flat, target_flat, mu, logvar,
                               self.beta)
     return loss
def plot_tsne_latent(model, dataloader, savepath):
    # Forward pass
    model.eval()
    zs = []
    labels = []
    for batch_counter, data in enumerate(dataloader):
        x_cuda = cuda_variable(data['input'])
        # Get z
        mu, logvar = model.encode(x_cuda)
        z = model.reparameterize(mu, logvar)
        zs += list(z.cpu().detach().numpy())
        labels += data['label']
        if batch_counter > 20:
            break
    z_embedded = TSNE(n_components=3).fit_transform(zs)

    label_to_points = {}
    for index, label in enumerate(labels):
        if label not in label_to_points:
            label_to_points[label] = []
        label_to_points[label].append(z_embedded[index])

    # Plot
    plt.clf()
    fig = plt.figure(figsize=(15, 15))
    ax = fig.add_subplot(111, projection='3d')
    colormap = mpl.cm.Set1.colors
    for colorind, (label, points) in enumerate(label_to_points.items()):
        points_np = np.asarray(points)
        color = colormap[colorind]
        xs = points_np[:, 0]
        ys = points_np[:, 1]
        zs = points_np[:, 2]
        ax.scatter(xs, ys, zs, color=color, marker='o', label=label)
    ax.legend()
    plt.show()
    plt.savefig(f'{savepath}/tsne.pdf')
    plt.close('all')
    return label_to_points
def plot_interpolations(model, hparams, dataloader, savepath,
                        num_interpolated_points, method, custom_data):
    # Forward pass
    model.eval()
    if custom_data is None:
        for _, data in enumerate(dataloader):
            x_cuda = cuda_variable(data['input'])
            # Get z
            mu, logvar = model.encode(x_cuda)
            z = model.reparameterize(mu, logvar)
            # Arbitrarily choose start and end points as batch_ind and batch_ind + 1
            start_z = z[:-1]
            end_z = z[1:]
            batch_dim, rgb_dim, h_dim, w_dim = x_cuda.shape
            num_examples = batch_dim - 1
            break
    else:
        x_cuda = cuda_variable(torch.tensor(custom_data['start_data']))
        # Get z
        mu, logvar = model.encode(x_cuda)
        start_z = model.reparameterize(mu, logvar)
        x_cuda = cuda_variable(torch.tensor(custom_data['end_data']))
        # Get z
        mu, logvar = model.encode(x_cuda)
        end_z = model.reparameterize(mu, logvar)
        batch_dim, rgb_dim, h_dim, w_dim = x_cuda.shape
        num_examples = batch_dim

    x_interpolation = np.zeros(
        (num_examples, rgb_dim, h_dim, w_dim, num_interpolated_points))

    ind_interp = 0
    for t in np.linspace(start=0, stop=1, num=num_interpolated_points):
        # Perform interp
        if method == 'linear':
            this_z = start_z * (1 - t) + end_z * t
        elif method == 'constant_radius':
            this_z = constant_radius_interpolation(start_z, end_z, t)
        else:
            raise NotImplementedError
        # Decode z
        x_recon = model.decode(this_z).cpu().detach().numpy()
        x_interpolation[:, :, :, :, ind_interp] = x_recon
        ind_interp = ind_interp + 1

    # Plot
    dims = h_dim, w_dim
    plt.clf()
    fig, axes = plt.subplots(nrows=num_examples, ncols=num_interpolated_points)
    for ind_example in range(num_examples):
        for ind_interp in range(num_interpolated_points):
            # show the image
            axes[ind_example,
                 ind_interp].matshow(x_interpolation[ind_example, :, :, :,
                                                     ind_interp].reshape(dims),
                                     origin="lower")
    for ax in fig.get_axes():
        ax.set_xticks([])
        ax.set_yticks([])
    plt.savefig(f'{savepath}/spectro.pdf')
    plt.close('all')

    # audio
    audios = None
    if hparams is not None:
        mel_basis = build_mel_basis(hparams, hparams.sr, hparams.sr)
        mel_inversion_basis = build_mel_inversion_basis(mel_basis)
        for ind_example in range(num_examples):
            for ind_interp in range(num_interpolated_points):
                audio = inv_spectrogram_sp(
                    x_interpolation[ind_example, 0, :, :, ind_interp],
                    n_fft=hparams.n_fft,
                    win_length=hparams.win_length_samples,
                    hop_length=hparams.hop_length_samples,
                    ref_level_db=hparams.ref_level_db,
                    power=hparams.power,
                    mel_inversion_basis=mel_inversion_basis)
                if audios is None:
                    audios = np.zeros(
                        (num_examples, num_interpolated_points, len(audio)))
                audios[ind_example, ind_interp] = audio
                sf.write(f'{savepath}/{ind_example}_{ind_interp}.wav',
                         audio,
                         samplerate=hparams.sr)
    return {'audios': audios, 'spectros': x_interpolation, 'dims': dims}
예제 #4
0
 def generate(self, batch_dim):
     # Prior is a gaussian w/ mean 0 and variance 1
     z = cuda_variable(torch.randn(batch_dim, self.n_z))
     x = self.decode(z)
     return x
def main(config_path, loading_epoch, source_path, contamination_path,
         contamination_parameters, method):
    # load model
    model, _, _, _, hparams, _, _, config_path = get_model_and_dataset(
        config=config_path, loading_epoch=loading_epoch)

    # set savepath
    savepath = f'{model.model_dir}/plots/contaminations'
    if not os.path.isdir(savepath):
        os.makedirs(savepath)

    # load files
    source = {}
    source['path'] = source_path
    waveform, chunks, start_samples, end_samples = get_chunks(
        path=source['path'], hparams=hparams)
    source['waveform'] = waveform
    source['chunks'] = chunks
    source['start_samples'] = start_samples
    source['end_samples'] = end_samples
    contamination = {}
    contamination['path'] = contamination_path
    waveform, chunks, start_samples, end_samples = get_chunks(
        path=contamination['path'], hparams=hparams)
    contamination['waveform'] = waveform
    contamination['chunks'] = chunks
    contamination['start_samples'] = start_samples
    contamination['end_samples'] = end_samples

    # Choose which samples to contaminate and by which degree
    contamination_indices = []
    contamination_degrees = []
    xs = []
    ys = []
    p_contamination = contamination_parameters['p_contamination']
    for index, chunk in enumerate(source['chunks']):
        if random.random() < p_contamination:
            contamination_indices.append(index)
            contamination_degrees.append(random.random())
            xs.append(chunk)
            # choose (randomly?) a contaminating syllable
            ys.append(random.choice(contamination['chunks']))
    xs_cuda = cuda_variable(torch.tensor(np.stack(xs)))
    ys_cuda = cuda_variable(torch.tensor(np.stack(ys)))

    # Encode
    mu, logvar = model.encode(xs_cuda)
    x_z = model.reparameterize(mu, logvar)
    mu, logvar = model.encode(ys_cuda)
    y_z = model.reparameterize(mu, logvar)
    z_out = torch.zeros_like(x_z)

    # Contaminate
    for batch_ind, t in enumerate(contamination_degrees):
        if method == 'linear':
            z_out[batch_ind] = x_z[batch_ind] * (1 - t) + y_z[batch_ind] * t
        elif method == 'constant_radius':
            z_out[batch_ind] = constant_radius_interpolation(
                x_z[batch_ind], y_z[batch_ind], t)
    # Decode z
    x_recon = model.decode(z_out).cpu().detach().numpy()

    # Replace contamined samples in original wave
    out_wave = source['waveform']
    mel_basis = build_mel_basis(hparams, hparams.sr, hparams.sr)
    mel_inversion_basis = build_mel_inversion_basis(mel_basis)
    for batch_index, contamination_index in enumerate(contamination_indices):
        new_chunk = x_recon[batch_index, 0]
        new_audio = inv_spectrogram_sp(new_chunk,
                                       n_fft=hparams.n_fft,
                                       win_length=hparams.win_length_samples,
                                       hop_length=hparams.hop_length_samples,
                                       ref_level_db=hparams.ref_level_db,
                                       power=hparams.power,
                                       mel_inversion_basis=mel_inversion_basis)
        start_sample = source['start_samples'][contamination_index]
        end_sample = source['end_samples'][contamination_index]
        length_sample = end_sample - start_sample
        # FAIRE UN FADE ICI
        out_wave[start_sample:end_sample] = new_audio[:length_sample]
    sf.write(f'{savepath}/contamination.wav', out_wave, samplerate=hparams.sr)
def plot_reconstruction(model, hparams, dataloader, savepath, custom_data):
    # Forward pass
    model.eval()
    if custom_data is None:
        for _, data in enumerate(dataloader):
            x_orig = data['input'].numpy()
            x_cuda = cuda_variable(data['input'])
            x_recon = model.reconstruct(x_cuda).cpu().detach().numpy()
            break
    else:
        x_orig = custom_data['all_data']
        x_cuda = cuda_variable(torch.tensor(custom_data['all_data']))
        x_recon = model.reconstruct(x_cuda).cpu().detach().numpy()
    # Plot
    dims = x_recon.shape[2:]
    num_examples = x_recon.shape[0]
    plt.clf()
    fig, axes = plt.subplots(nrows=2, ncols=num_examples)
    for i in range(num_examples):
        # show the image
        axes[0, i].matshow(x_orig[i].reshape(dims), origin="lower")
        axes[1, i].matshow(x_recon[i].reshape(dims), origin="lower")
    for ax in fig.get_axes():
        ax.set_xticks([])
        ax.set_yticks([])
    plt.savefig(f'{savepath}/spectro.pdf')
    plt.close('all')

    # audio
    original_audios = []
    reconstruction_audios = []
    if hparams is not None:
        mel_basis = build_mel_basis(hparams, hparams.sr, hparams.sr)
        mel_inversion_basis = build_mel_inversion_basis(mel_basis)
        for i in range(num_examples):
            original_audio = inv_spectrogram_sp(
                x_orig[i, 0],
                n_fft=hparams.n_fft,
                win_length=hparams.win_length_samples,
                hop_length=hparams.hop_length_samples,
                ref_level_db=hparams.ref_level_db,
                power=hparams.power,
                mel_inversion_basis=mel_inversion_basis)
            recon_audio = inv_spectrogram_sp(
                x_recon[i, 0],
                n_fft=hparams.n_fft,
                win_length=hparams.win_length_samples,
                hop_length=hparams.hop_length_samples,
                ref_level_db=hparams.ref_level_db,
                power=hparams.power,
                mel_inversion_basis=mel_inversion_basis)

            sf.write(f'{savepath}/{i}_original.wav',
                     original_audio,
                     samplerate=hparams.sr)
            sf.write(f'{savepath}/{i}_recon.wav',
                     recon_audio,
                     samplerate=hparams.sr)

            original_audios.append(original_audio)
            reconstruction_audios.append(recon_audio)
    return {
        'original_audios': original_audios,
        'reconstruction_audios': reconstruction_audios,
        'original_spectros': x_orig,
        'reconstruction_spectros': x_recon
    }