Example #1
0
def inference_fast(mels, real_audio=None, i=0, epoch=0):
    generator.eval()
    val_losses = []
    audio = torch.zeros(mels.shape[0], 1, 1).to(device)
    with torch.no_grad():
        for length in range(1, mels.shape[2] + 1):
            res = generator(audio[:, :, -1:],
                            mels[:, :, length - 1:length],
                            fast_generation=True)
            audio = torch.cat([
                audio,
                mu_decode(torch.argmax(res[:, :, -1:], dim=1)).unsqueeze(1)
            ],
                              dim=2)
        name = "example " + str(i) + "_no_tf"
        torchaudio.save("gen.wav",
                        audio[0].squeeze().detach().cpu().numpy(),
                        sample_rate=22050)
        wandb_gen = wandb.Audio(audio[0].squeeze().detach().cpu().numpy(),
                                caption="Inference_1",
                                sample_rate=22050)
        wandb_audios = [wandb_gen]
        if real_audio != None:
            wandb_real = wandb.Audio(
                real_audio[0].squeeze().detach().cpu().numpy(),
                caption="Real_1",
                sample_rate=22050)
            wandb_audios.append(wandb_real)
        wandb.log({name: wandb_audios}, step=epoch)
Example #2
0
    def validation_step(self, batch, batch_idx):
        # Calculate losses and results in training and inference modes
        loss, pred_mag, mixture_mag, mixture_phase, no_vocals_mag, no_vocals_phase, sr = \
            self.step(batch, batch_idx, inference=False)
        pred_waveform = self.inv_spec_transform((pred_mag, mixture_phase))
        mixed_waveform = self.inv_spec_transform((mixture_mag, mixture_phase))
        instrument_waveform = self.inv_spec_transform(
            (no_vocals_mag, no_vocals_phase))

        mixed_audio = [
            wandb.Audio(wav.detach().cpu(), sample_rate=sr[0])
            for wav in mixed_waveform
        ]
        pred_audio = [
            wandb.Audio(wav.detach().cpu(), sample_rate=sr[0])
            for wav in pred_waveform
        ]
        instrument_audio = [
            wandb.Audio(wav.detach().cpu(), sample_rate=sr[0])
            for wav in instrument_waveform
        ]

        self.logger.experiment.log(
            {
                "Mixed audio": mixed_audio,
                "Predicted audio": pred_audio,
                "True audio": instrument_audio
            },
            commit=False)
        self.log("val_loss", loss)

        return loss, pred_mag, mixture_mag, mixture_phase, no_vocals_mag, no_vocals_phase
Example #3
0
def run_inference(text, audio=None):
    generator.eval()
    text = [ord(c) for c in text if ord(c) < 256]
    text = torch.tensor(text).view(1, -1)
    with torch.no_grad():
        text = text.to(device)
        pad_mask = (text != 0).to(device)
        res, before_prenet, stop_token, attn_matrix = generator(
            text, pad_mask, None, device)
        wandb_gen = wandb.Image(res[0, :, :].detach().cpu().numpy(),
                                caption="Generated")
        wandb_attn = wandb.Image(256 *
                                 attn_matrix[0, :, :].detach().cpu().numpy(),
                                 caption="Attention")
        wandb_images = [wandb_gen, wandb_attn]
        audio_gen = vocoder.inference(res[:1, :, :].detach().cpu())
        torchaudio.save("gen.wav", audio_gen, sample_rate=22050)
        wandb_audios = [
            wandb.Audio("gen.wav", caption="Generated", sample_rate=22050)
        ]
        if audio != None:
            wandb_real = wandb.Image(audio[0, :, :].detach().cpu().numpy(),
                                     caption="Real")
            wandb_images.append(wandb_real)
            audio_real = vocoder.inference(audio[:1, :, :])
            torchaudio.save("temp_real.wav", audio_real, sample_rate=22050)
            wandb_audios.append(
                wandb.Audio("temp_real.wav", caption="Real",
                            sample_rate=22050))
        wandb.log({"mels": wandb_images}, step=0)
        wandb.log({"audios": wandb_audios}, step=0)
        api.flush()
Example #4
0
def test_audio_sample_rates():
    audio1 = np.random.uniform(-1, 1, 44100)
    audio2 = np.random.uniform(-1, 1, 88200)
    wbaudio1 = wandb.Audio(audio1, sample_rate=44100)
    wbaudio2 = wandb.Audio(audio2, sample_rate=88200)
    assert wandb.Audio.sample_rates([wbaudio1, wbaudio2]) == [44100, 88200]
    # test with missing sample rate
    with pytest.raises(ValueError):
        wandb.Audio(audio1)
Example #5
0
 def training_step(self, batch, batch_nb):
     # REQUIRED
     batch = self.mel(self.gpu(batch))
     batch = self.preprocess(batch)
     batch['mel'] = batch['mel'].permute(0, 2, 1)
     mel_outputs, mel_outputs_postnet, gate_out, alignments = self(batch)
     train_mse = self.mels_mse(mel_outputs, mel_outputs_postnet, batch)
     train_gate = self.gate_loss(gate_out, batch['mel_lengths'])
     loss = train_mse + train_gate
     losses_dict = {
         'train_loss': loss.item(),
         'train_mse': train_mse.item(),
         'train_gate_loss': train_gate.item()
     }
     if self.config.train.use_guided_attention:
         attn_loss, guide = self.guided_attention_loss(alignments)
         loss += attn_loss
         losses_dict['train_attn_loss'] = attn_loss.item()
     self.logger.experiment.log(losses_dict)
     if batch_nb % self.config.train.train_log_period == 1:
         examples = [
             wandb.Image(mel_outputs_postnet[0].detach().cpu().numpy(),
                         caption='predicted_mel'),
             wandb.Image(batch['mel'][0].detach().cpu().numpy(),
                         caption='target_mel'),
             wandb.Image(alignments[0].detach().cpu().numpy(),
                         caption='alignment')
         ]
         self.logger.experiment.log({
             'input_texts_train':
             wandb.Table(data=[
                 self.text_transform.reverse(
                     batch['text'][0].detach().cpu().numpy())
             ],
                         columns=["Text"])
         })
         if self.config.train.use_guided_attention:
             examples.append(
                 wandb.Image(guide.cpu().numpy(),
                             caption='attention_guide'))
         self.logger.experiment.log({"plots_train": examples})
         examples = []
         if self.vocoder is not None:
             reconstructed_wav = self.vocoder.inference(
                 mel_outputs_postnet[0].detach().permute(1, 0)[None])[0]
             examples.append(
                 wandb.Audio(reconstructed_wav.detach().cpu().numpy(),
                             caption='reconstructed_wav',
                             sample_rate=self.sample_rate))
             examples.append(
                 wandb.Audio(batch['audio'][0].detach().cpu().numpy(),
                             caption='target_wav',
                             sample_rate=self.sample_rate))
         self.logger.experiment.log({"audios_train": examples})
     return loss
    def post_audio(self, y_orig, y_augm, rate, tag="Data augmentation example"):
        '''
        Uploads two audio samples to W&B.

                Parameters:
                        y_orig (numpy.ndarray): The original track
                        y_augm (numpy.ndarray): The augmented track
                        rate (int): The sample rate of the audio track for correct playback speed
                        tag (str): The tag under which the samples should be visible (you could also use track name)
        '''
        wandb.log({tag: [ \
            wandb.Audio(y_orig, caption="Original", sample_rate=rate), \
            wandb.Audio(y_augm, caption="Augmented", sample_rate=rate), \
        ]})
Example #7
0
    def validation_step(self, batch, batch_idx):
        audio, audio_len = batch
        audio_mel, audio_mel_len = self.audio_to_melspec_precessor(
            audio, audio_len)
        audio_pred = self(spec=audio_mel)

        audio_pred_mel, _ = self.audio_to_melspec_precessor(
            audio_pred.squeeze(1), audio_len)
        loss_mel = F.l1_loss(audio_mel, audio_pred_mel)

        self.log("val_loss", loss_mel, prog_bar=True, sync_dist=True)

        # plot audio once per epoch
        if batch_idx == 0 and isinstance(self.logger, WandbLogger):
            clips = []
            specs = []
            for i in range(min(5, audio.shape[0])):
                clips += [
                    wandb.Audio(
                        audio[i, :audio_len[i]].data.cpu().numpy(),
                        caption=f"real audio {i}",
                        sample_rate=self.sample_rate,
                    ),
                    wandb.Audio(
                        audio_pred[i,
                                   0, :audio_len[i]].data.cpu().numpy().astype(
                                       'float32'),
                        caption=f"generated audio {i}",
                        sample_rate=self.sample_rate,
                    ),
                ]
                specs += [
                    wandb.Image(
                        plot_spectrogram_to_numpy(audio_mel[
                            i, :, :audio_mel_len[i]].data.cpu().numpy()),
                        caption=f"real audio {i}",
                    ),
                    wandb.Image(
                        plot_spectrogram_to_numpy(audio_pred_mel[
                            i, :, :audio_mel_len[i]].data.cpu().numpy()),
                        caption=f"generated audio {i}",
                    ),
                ]

            self.logger.experiment.log({
                "audio": clips,
                "specs": specs
            },
                                       commit=False)
Example #8
0
    def on_validation_epoch_end(self):
        val_ids = [0] if self.dev_mode else [0, 1, 2]
        for idx in val_ids:
            estimation = {}
            for target_name in self.target_names:
                estimation[target_name] = get_estimation(
                    idx, target_name, self.valid_estimation_dict)
                if estimation[target_name] is None:
                    continue
                if estimation[target_name] is not None:
                    estimation[target_name] = estimation[target_name].astype(
                        np.float32)

                    if self.current_epoch > 10 and isinstance(
                            self.logger, WandbLogger):
                        self.logger.experiment.log({
                            'result_sample_{}_{}'.format(
                                self.current_epoch, target_name): [
                                wandb.Audio(
                                    estimation[target_name][44100 * 60:44100 *
                                                            65],
                                    caption='{}_{}'.format(idx, target_name),
                                    sample_rate=44100)
                            ]
                        })
Example #9
0
    def add_audio(self,
                  tag,
                  snd_tensor,
                  global_step=None,
                  sample_rate=44100,
                  caption=None):
        """Add audio data to summary.

        Args:
            tag (string): Data identifier
            snd_tensor (torch.Tensor, numpy.array, or string/blobname): Sound data
            global_step (int): Global step value to record
            sample_rate (int): sample rate in Hz

        Shape:
            snd_tensor: :math:`(1, L)`. The values should lie between [-1, 1].
        """
        wandb.log(
            {
                tag: [
                    wandb.Audio(snd_tensor,
                                caption=caption,
                                sample_rate=sample_rate / 1000.)
                ],
                'epoch':
                global_step
            },
            commit=True)
Example #10
0
 def on_epoch_end(self, runner: "IRunner"):
     if (runner.epoch - 1) % 10 == 0:
         mel = torch.load(self.mel_path)
         hop_length = 256
         # pad input mel with zeros to cut artifact
         # see https://github.com/seungwonpark/melgan/issues/8
         zero = torch.full((1, 80, 10), -11.5129).to(mel.device)
         mel = torch.cat((mel, zero), dim=2)
         generator = get_nn_from_ddp_module(runner.model)["generator"]
         if torch.cuda.is_available():
             mel.to("cuda")
             mel = mel.type(torch.cuda.FloatTensor)
         audio = generator.forward(mel).detach().cpu()
         audio = audio.squeeze()  # collapse all dimension except time axis
         audio = audio[:-(hop_length * 10)]
         audio = MAX_WAV_VALUE * audio
         audio = audio.clamp(min=-MAX_WAV_VALUE, max=MAX_WAV_VALUE - 1)
         audio = audio.short()
         audio = audio.cpu().detach().numpy()
         try:
             import wandb
             wandb.log(
                 {
                     f"generated_{runner.epoch}.wav": [
                         wandb.Audio(audio,
                                     caption=self.mel_path,
                                     sample_rate=22050)
                     ]
                 },
                 step=runner.epoch)
         except:
             Warning("can't import wandb")
         out_path = self.out_name + f"_{runner.epoch}.wav"
         write(out_path, 22050, audio)
Example #11
0
def _make_wandb_audio(frequency, caption):
    SAMPLE_RATE = 44100
    DURATION_SECONDS = 1

    data = np.sin(2 * np.pi * np.arange(SAMPLE_RATE * DURATION_SECONDS) *
                  frequency / SAMPLE_RATE)
    return wandb.Audio(data, SAMPLE_RATE, caption)
def make_wandb_audio(x, caption, fs, is_multi=False):
    if is_multi:
        np_x = x.detach().cpu().numpy().reshape((x.shape[1], -1))
        np_x = np_x / (np.max(np.abs(np_x), axis=1)[:,None] + 1e-7)

        audio_objs = []
        for i in range(min(4, len(np_x))):
            audio_objs.append(wandb.Audio(np_x[i].reshape(-1),
                                            caption=caption + '_chan_{}'.format(i),
                                            sample_rate=fs))

    else:
        np_x = x.detach().cpu().numpy().reshape((-1))
        np_x = np_x / (np.max(np.abs(np_x))+ 1e-7)
        audio_objs = [wandb.Audio(np_x, caption=caption, sample_rate=fs)]

    return audio_objs
Example #13
0
 def dict_to_audios(self, scope_name, audios, sample_rate):
     for key, value in audios.items():
         if value.dtype == "float16":
             value = value.astype("float32")
         try:
             self.log_dict["{}/{}".format(scope_name, key)] = wandb.Audio(
                 value, sample_rate=sample_rate)
         except RuntimeError:
             traceback.print_exc()
Example #14
0
    def valid(self, step):
        self.model.eval()
        index = random.randint(0, len(self.noisy_files)-1)
        noisy = dataloader.read_wav(self.noisy_files[index])
        write_wav(os.path.join(self.output_dir, "noisy_{}.wav".format(step)), noisy)

        wandb.log({"noisy": [wandb.Audio(os.path.join(self.output_dir, "noisy_{}.wav".format(step)), caption="noisy", sample_rate=self.sr)]})

        noisy = torch.tensor([noisy], dtype=torch.float32, device=self.device)
        with torch.no_grad():
            clean, noise = self.model(noisy)

        clean = np.squeeze(clean.cpu().detach().numpy())
        write_wav(os.path.join(self.output_dir, "clean_{}.wav".format(step)), clean, self.sr)
        wandb.log({"clean": [wandb.Audio(os.path.join(self.output_dir, "clean_{}.wav".format(step)), caption="clean", sample_rate=self.sr)]})
        
        noise = np.squeeze(noise.cpu().detach().numpy())
        write_wav(os.path.join(self.output_dir, "noise_{}.wav".format(step)), noise, self.sr)
        wandb.log({"noise": [wandb.Audio(os.path.join(self.output_dir, "noise_{}.wav".format(step)), caption="noise", sample_rate=self.sr)]})
Example #15
0
def test_audio_refs():
    audioObj = wandb.Audio(
        "https://wandb-artifacts-refs-public-test.s3-us-west-2.amazonaws.com/StarWars3.wav"
    )
    art = wandb.Artifact("audio_ref_test", "dataset")
    art.add(audioObj, "audio_ref")

    audio_expected = {
        "_type": "audio-file",
        "caption": None,
    }
    assert utils.subdict(audioObj.to_json(art), audio_expected) == audio_expected
Example #16
0
def test_audio_transform():
    audio = np.random.uniform(-1, 1, 44100)
    with CliRunner().isolated_filesystem():
        meta = wandb.Audio.transform([wandb.Audio(audio, sample_rate=44100)],
                                     ".", "test", 0)
        assert meta == {
            '_type': 'audio',
            'count': 1,
            'sampleRates': [44100],
            'durations': [1.0]
        }
        assert os.path.exists("media/audio/test_0_0.wav")
Example #17
0
    def log_audios(self, params, logs):
        inputs = [
            self.model.get_layer(l).output
            for l in params.get('in_layers', None)
        ]
        outs = [
            self.model.get_layer(l).output
            for l in params.get('out_layers', None)
        ]

        predict_fn = tf.keras.backend.function(inputs=inputs, outputs=outs)

        test_data = params.get('test_data', None)
        log_y = params.get('log_gt', True)
        join_by = params.get('join_by', None)

        test_data.step = 0
        test_data.intraepoch_step = 0

        x, y = test_data.__getitem__(0)
        sr = params.get('sr', 16000)

        y_pred = predict_fn(x)
        n_samples = y_pred[0].shape[0]
        batch_data = test_data.data.iloc[:n_samples].reset_index()

        if join_by is not None:
            for i, g in enumerate(batch_data[join_by].unique()):
                g_data = batch_data.loc[batch_data[join_by] == g]
                max_samples = g_data.end.max()
                g_audio = np.zeros((max_samples, ))
                for logid, row in g_data.iterrows():
                    duration = int(row['end']) - int(row['start'])
                    window = np.concatenate([
                        np.linspace(0, 1, duration // 2),
                        np.linspace(1, 0, duration - duration // 2)
                    ])
                    g_audio[int(row['start']):int(row['end'])] = g_audio[
                        int(row['start']
                            ):int(row['end'])] + window * y_pred[0][int(logid)]
                if isinstance(sr, int):
                    sr_i = sr
                else:
                    sr_i = int(g_data.iloc[0][sr])

                wandb.log({
                    g.split('/')[-1]:
                    wandb.Audio(g_audio,
                                caption=g.split('/')[-1],
                                sample_rate=sr_i)
                })
Example #18
0
 def _log_results(self,
                  audio,
                  decoded_targets,
                  decoded_preds,
                  num_examples=4):
     audios = []
     examples = list(zip(audio, decoded_targets,
                         decoded_preds))[:num_examples]
     for (waveform, targ, pred) in examples:
         caption = f"Targ:{targ} --- Pred:{pred}\n"
         audios.append(
             wandb.Audio(waveform.cpu(), caption=caption,
                         sample_rate=16000))
     wandb.log({"examples": audios})
Example #19
0
    def log_songs(prefix, songs, names, log_name):
        log = []
        for song, name in zip(songs, names):
            song.write_midi(os.path.join(wandb.run.dir,
                                         prefix + name + ".mid"))
            midi_to_wav(os.path.join(wandb.run.dir, prefix + name + ".mid"),
                        os.path.join(wandb.run.dir, prefix + name + ".wav"))
            log.append(
                wandb.Audio(os.path.join(wandb.run.dir,
                                         prefix + name + ".wav"),
                            caption="original",
                            sample_rate=32))

        wandb.log({log_name: log})
Example #20
0
    def on_epoch_end(self, *args):
        validation_X = self.validation_data[0]
        validation_y = self.validation_data[1]
        val_scales = scales["dirty"][:20]
        validation_length = len(validation_X)
        indices = np.random.choice(validation_length, 1, replace=False)
        predictions = self.model.predict(validation_X[indices])
        print("Min: ", predictions.min(), "Max: ", predictions.max())
        predictions = predictions.clip(0, 1)  # np.max(abs(predictions))
        norm_pred = []
        norm_in = []
        clean_in = []
        for i, idx in enumerate(indices):
            scale = val_scales[idx]
            pred = np.squeeze(predictions[i])
            norm = np.squeeze(validation_X[idx])
            clean = np.squeeze(validation_y[idx])
            norm_pred.append(audio_utilities.griffin_lim(pred, scale))
            norm_in.append(audio_utilities.griffin_lim(norm, scale))
            clean_in.append(audio_utilities.griffin_lim(clean, scale))

        wandb.log(
            {
                "clean_audio": [
                    wandb.Audio(audio, sample_rate=sample_rate)
                    for audio in clean_in
                ],
                "noisy_audio": [
                    wandb.Audio(audio, sample_rate=sample_rate)
                    for audio in norm_in
                ],
                "audio": [
                    wandb.Audio(audio, sample_rate=sample_rate)
                    for audio in norm_pred
                ]
            },
            commit=False)
Example #21
0
    def validate(self, name):
        try:
            noisy, _ = next(self.val_iter)
        except:
            self.val_iter = iter(self.train_dataloader)
            noisy, _ = next(self.val_iter)
        audio_utils.write_wav(
            os.path.join(self.output_dir, "noisy_{}.wav".format(name)), noisy)
        wandb.log({
            "noisy": [
                wandb.Audio(os.path.join(self.output_dir,
                                         "noisy_{}.wav".format(name)),
                            caption="noisy",
                            sample_rate=16000)
            ]
        })

        with torch.no_grad():
            m = noisy.mean()
            noisy = (noisy - m)
            noisy = torch.tensor([noisy]).to(self.device)
            out = self.model(noisy)

        clean = np.squeeze(out.cpu().detach().numpy())
        clean = clean + m
        audio_utils.write_wav(
            os.path.join(self.output_dir, "clean_{}.wav".format(name)), clean)
        wandb.log({
            "clean": [
                wandb.Audio(os.path.join(self.output_dir,
                                         "clean_{}.wav".format(name)),
                            caption="clean",
                            sample_rate=16000)
            ]
        })
        torch.save(self.model.state_dict(),
                   os.path.join(self.output_dir, "{}.pth".format(name)))
    def on_test_epoch_end(self):

        results = museval.EvalStore(frames_agg='median', tracks_agg='median')

        for idx in range(self.musdb_test.num_tracks):
            estimation = {}
            for target_name in self.target_names:
                estimation[target_name] = get_estimation(idx, target_name, self.test_estimation_dict)
                if estimation[target_name] is not None:
                    estimation[target_name] = estimation[target_name].astype(np.float32)
            # Real SDR
            if len(estimation) == len(self.target_names):
                track_length = self.musdb_test.musdb_reference[idx].samples
                estimated_targets = [estimation[target_name][:track_length] for target_name in self.target_names]
                if track_length > estimated_targets[0].shape[0]:
                    raise NotImplementedError
                else:
                    estimated_targets_dict = {target_name: estimation[target_name][:track_length] for target_name in
                                              self.target_names}
                    track_score = museval.eval_mus_track(
                        self.musdb_test.musdb_reference[idx],
                        estimated_targets_dict
                    )
                    score_dict = track_score.df.loc[:, ['target', 'metric', 'score']].groupby(
                        ['target', 'metric'])['score'] \
                        .median().to_dict()
                    if isinstance(self.logger, WandbLogger):
                        self.logger.experiment.log(
                            {'test_result/{}_{}'.format(k1, k2): score_dict[(k1, k2)] for k1, k2 in score_dict.keys()})
                    else:
                        print(track_score)
                    results.add_track(track_score)
            if idx == 1 and isinstance(self.logger, WandbLogger):
                self.logger.experiment.log({'result_sample_{}_{}'.format(self.current_epoch, target_name): [
                    wandb.Audio(estimation[target_name], caption='{}_{}'.format(idx, target_name), sample_rate=44100)]})

        if isinstance(self.logger, WandbLogger):
            result_dict = results.df.groupby(
                ['track', 'target', 'metric']
            )['score'].median().reset_index().groupby(
                ['target', 'metric']
            )['score'].median().to_dict()
            self.logger.experiment.log(
                {'test_result/agg/{}_{}'.format(k1, k2): result_dict[(k1, k2)] for k1, k2 in result_dict.keys()}
            )
        else:
            print(results)
    def validation_epoch_end(self, outputs: List[Any]) -> None:
        for idx in [0]:
            estimation = {}
            for target_name in self.target_names:
                estimation[target_name] = get_estimation(idx, target_name, self.valid_estimation_dict)
                if estimation[target_name] is None:
                    continue
                if estimation[target_name] is not None:
                    estimation[target_name] = estimation[target_name].astype(np.float32)

                    if self.current_epoch > 1 and isinstance(self.logger, WandbLogger):
                        track = estimation[target_name]
                        if track.shape[0] > 40 * 44100:
                            track = track[44100 * 20:44100 * 40]

                        self.logger.experiment.log({'result_sample_{}_{}'.format(self.current_epoch, target_name): [
                            wandb.Audio(track, caption='{}_{}'.format(idx, target_name), sample_rate=44100)]})

        reduced_loss = torch.stack(outputs).mean()
        self.log('val_loss', reduced_loss, prog_bar=False, logger=True, on_step=False, on_epoch=True, sync_dist=True)
        print(reduced_loss)
Example #24
0
def test_audio_to_json(mocked_run):
    audio = np.zeros(44100)
    audioObj = wandb.Audio(audio, sample_rate=44100)
    audioObj.bind_to_run(mocked_run, "test", 0)
    meta = wandb.Audio.seq_to_json([audioObj], mocked_run, "test", 0)
    assert os.path.exists(os.path.join(mocked_run.dir, meta["audio"][0]["path"]))

    meta_expected = {
        "_type": "audio",
        "count": 1,
        "sampleRates": [44100],
        "durations": [1.0],
    }
    assert utils.subdict(meta, meta_expected) == meta_expected

    audio_expected = {
        "_type": "audio-file",
        "caption": None,
        "size": 88244,
    }
    assert utils.subdict(meta["audio"][0], audio_expected) == audio_expected
Example #25
0
def test_audio_captions():
    audio = np.random.uniform(-1, 1, 44100)
    sample_rate = 44100
    caption1 = "This is what a dog sounds like"
    caption2 = "This is what a chicken sounds like"
    # test with all captions
    wbaudio1 = wandb.Audio(audio, sample_rate=sample_rate, caption=caption1)
    wbaudio2 = wandb.Audio(audio, sample_rate=sample_rate, caption=caption2)
    assert wandb.Audio.captions([wbaudio1, wbaudio2]) == [caption1, caption2]
    # test with no captions
    wbaudio3 = wandb.Audio(audio, sample_rate=sample_rate)
    wbaudio4 = wandb.Audio(audio, sample_rate=sample_rate)
    assert wandb.Audio.captions([wbaudio3, wbaudio4]) is False
    # test with some captions
    wbaudio5 = wandb.Audio(audio, sample_rate=sample_rate)
    wbaudio6 = wandb.Audio(audio, sample_rate=sample_rate, caption=caption2)
    assert wandb.Audio.captions([wbaudio5, wbaudio6]) == ["", caption2]
Example #26
0
def test(epoch, test_dataset, model, device):
    model.eval()

    mel, wav = test_dataset[0]

    mel, wav = mel.to(device), wav.to(device)

    with torch.no_grad():
        pred = model.inference(
            mel.unsqueeze(0)).squeeze(0).detach().cpu().numpy()

    wandb.log({
        "examples": [
            wandb.Audio(pred,
                        caption="Epoch {}".format(epoch),
                        sample_rate=22050)
        ]
    })

    torch.save(model.state_dict(),
               'checkpoints/wavenet_epoch{}.pt'.format(epoch))

    return pred
Example #27
0
def test_audio_to_json(mocked_run):
    audio = np.zeros(44100)
    audioObj = wandb.Audio(audio, sample_rate=44100)
    audioObj.bind_to_run(mocked_run, "test", 0)
    meta = wandb.Audio.seq_to_json(
        [audioObj], mocked_run, "test", 0)
    assert os.path.exists(os.path.join(mocked_run.dir, meta['audio'][0]['path']))

    meta_expected = {
        '_type': 'audio',
        'count': 1,
        'sampleRates': [44100],
        'durations': [1.0],
    }
    assert utils.subdict(meta, meta_expected) == meta_expected

    audio_expected = {
        '_type': 'audio-file',
        'caption': None,
        'sample_rate': 44100,
        'size': 88244,
    }
    assert utils.subdict(meta['audio'][0], audio_expected) == audio_expected
Example #28
0
def test_audio_to_json():
    audio = np.zeros(44100)
    with CliRunner().isolated_filesystem():
        run = wandb.wandb_run.Run()
        meta = wandb.Audio.seq_to_json(
            [wandb.Audio(audio, sample_rate=44100)], run, "test", 0)
        assert os.path.exists(os.path.join(run.dir, meta['audio'][0]['path']))

        meta_expected = {
            '_type': 'audio',
            'count': 1,
            'sampleRates': [44100],
            'durations': [1.0],
        }
        assert utils.subdict(meta, meta_expected) == meta_expected

        audio_expected = {
            '_type': 'audio-file',
            'caption': None,
            'sample_rate': 44100,
            'size': 88244,
        }
        assert utils.subdict(meta['audio'][0], audio_expected) == audio_expected
Example #29
0
def get_audios(items: List[str], predictions: List[Any],
               targets: List[Any]) -> List[wandb.Audio]:
    """Returns a list of wandb.Audio objects

    :param items: list of items of all inputs
    :type items: List[str]
    :param predictions: model predictions
    :type predictions: List[Any]
    :param targets: ground truths
    :type targets: List[Any]
    """
    audios = []

    for item, prediction, target in zip(items, predictions, targets):

        audio = item.load()
        caption = 'Pred: {} GT: {}'.format(prediction, target)

        audios.append(
            wandb.Audio(audio['signal'],
                        caption=caption,
                        sample_rate=audio['rate']))

    return audios
Example #30
0
vid4 = _make_video()


def _make_wandb_audio(frequency, caption):
    SAMPLE_RATE = 44100
    DURATION_SECONDS = 1

    data = np.sin(2 * np.pi * np.arange(SAMPLE_RATE * DURATION_SECONDS) *
                  frequency / SAMPLE_RATE)
    return wandb.Audio(data, SAMPLE_RATE, caption)


aud1 = _make_wandb_audio(440, "four forty")

aud_ref_https = wandb.Audio(
    "https://wandb-artifacts-refs-public-test.s3-us-west-2.amazonaws.com/StarWars3.wav",
    caption="star wars https")
aud_ref_s3 = wandb.Audio("s3://wandb-artifacts-refs-public-test/StarWars3.wav",
                         caption="star wars s3")
aud_ref_gs = wandb.Audio("gs://wandb-artifact-refs-public-test/StarWars3.wav",
                         caption="star wars gs")

np_data = np.random.randint(255, size=(4, 16, 16, 3))


def _make_wandb_table():
    classes = wandb.Classes([
        {
            "id": 1,
            "name": "tree"
        },