def inference_fast(mels, real_audio=None, i=0, epoch=0): generator.eval() val_losses = [] audio = torch.zeros(mels.shape[0], 1, 1).to(device) with torch.no_grad(): for length in range(1, mels.shape[2] + 1): res = generator(audio[:, :, -1:], mels[:, :, length - 1:length], fast_generation=True) audio = torch.cat([ audio, mu_decode(torch.argmax(res[:, :, -1:], dim=1)).unsqueeze(1) ], dim=2) name = "example " + str(i) + "_no_tf" torchaudio.save("gen.wav", audio[0].squeeze().detach().cpu().numpy(), sample_rate=22050) wandb_gen = wandb.Audio(audio[0].squeeze().detach().cpu().numpy(), caption="Inference_1", sample_rate=22050) wandb_audios = [wandb_gen] if real_audio != None: wandb_real = wandb.Audio( real_audio[0].squeeze().detach().cpu().numpy(), caption="Real_1", sample_rate=22050) wandb_audios.append(wandb_real) wandb.log({name: wandb_audios}, step=epoch)
def validation_step(self, batch, batch_idx): # Calculate losses and results in training and inference modes loss, pred_mag, mixture_mag, mixture_phase, no_vocals_mag, no_vocals_phase, sr = \ self.step(batch, batch_idx, inference=False) pred_waveform = self.inv_spec_transform((pred_mag, mixture_phase)) mixed_waveform = self.inv_spec_transform((mixture_mag, mixture_phase)) instrument_waveform = self.inv_spec_transform( (no_vocals_mag, no_vocals_phase)) mixed_audio = [ wandb.Audio(wav.detach().cpu(), sample_rate=sr[0]) for wav in mixed_waveform ] pred_audio = [ wandb.Audio(wav.detach().cpu(), sample_rate=sr[0]) for wav in pred_waveform ] instrument_audio = [ wandb.Audio(wav.detach().cpu(), sample_rate=sr[0]) for wav in instrument_waveform ] self.logger.experiment.log( { "Mixed audio": mixed_audio, "Predicted audio": pred_audio, "True audio": instrument_audio }, commit=False) self.log("val_loss", loss) return loss, pred_mag, mixture_mag, mixture_phase, no_vocals_mag, no_vocals_phase
def run_inference(text, audio=None): generator.eval() text = [ord(c) for c in text if ord(c) < 256] text = torch.tensor(text).view(1, -1) with torch.no_grad(): text = text.to(device) pad_mask = (text != 0).to(device) res, before_prenet, stop_token, attn_matrix = generator( text, pad_mask, None, device) wandb_gen = wandb.Image(res[0, :, :].detach().cpu().numpy(), caption="Generated") wandb_attn = wandb.Image(256 * attn_matrix[0, :, :].detach().cpu().numpy(), caption="Attention") wandb_images = [wandb_gen, wandb_attn] audio_gen = vocoder.inference(res[:1, :, :].detach().cpu()) torchaudio.save("gen.wav", audio_gen, sample_rate=22050) wandb_audios = [ wandb.Audio("gen.wav", caption="Generated", sample_rate=22050) ] if audio != None: wandb_real = wandb.Image(audio[0, :, :].detach().cpu().numpy(), caption="Real") wandb_images.append(wandb_real) audio_real = vocoder.inference(audio[:1, :, :]) torchaudio.save("temp_real.wav", audio_real, sample_rate=22050) wandb_audios.append( wandb.Audio("temp_real.wav", caption="Real", sample_rate=22050)) wandb.log({"mels": wandb_images}, step=0) wandb.log({"audios": wandb_audios}, step=0) api.flush()
def test_audio_sample_rates(): audio1 = np.random.uniform(-1, 1, 44100) audio2 = np.random.uniform(-1, 1, 88200) wbaudio1 = wandb.Audio(audio1, sample_rate=44100) wbaudio2 = wandb.Audio(audio2, sample_rate=88200) assert wandb.Audio.sample_rates([wbaudio1, wbaudio2]) == [44100, 88200] # test with missing sample rate with pytest.raises(ValueError): wandb.Audio(audio1)
def training_step(self, batch, batch_nb): # REQUIRED batch = self.mel(self.gpu(batch)) batch = self.preprocess(batch) batch['mel'] = batch['mel'].permute(0, 2, 1) mel_outputs, mel_outputs_postnet, gate_out, alignments = self(batch) train_mse = self.mels_mse(mel_outputs, mel_outputs_postnet, batch) train_gate = self.gate_loss(gate_out, batch['mel_lengths']) loss = train_mse + train_gate losses_dict = { 'train_loss': loss.item(), 'train_mse': train_mse.item(), 'train_gate_loss': train_gate.item() } if self.config.train.use_guided_attention: attn_loss, guide = self.guided_attention_loss(alignments) loss += attn_loss losses_dict['train_attn_loss'] = attn_loss.item() self.logger.experiment.log(losses_dict) if batch_nb % self.config.train.train_log_period == 1: examples = [ wandb.Image(mel_outputs_postnet[0].detach().cpu().numpy(), caption='predicted_mel'), wandb.Image(batch['mel'][0].detach().cpu().numpy(), caption='target_mel'), wandb.Image(alignments[0].detach().cpu().numpy(), caption='alignment') ] self.logger.experiment.log({ 'input_texts_train': wandb.Table(data=[ self.text_transform.reverse( batch['text'][0].detach().cpu().numpy()) ], columns=["Text"]) }) if self.config.train.use_guided_attention: examples.append( wandb.Image(guide.cpu().numpy(), caption='attention_guide')) self.logger.experiment.log({"plots_train": examples}) examples = [] if self.vocoder is not None: reconstructed_wav = self.vocoder.inference( mel_outputs_postnet[0].detach().permute(1, 0)[None])[0] examples.append( wandb.Audio(reconstructed_wav.detach().cpu().numpy(), caption='reconstructed_wav', sample_rate=self.sample_rate)) examples.append( wandb.Audio(batch['audio'][0].detach().cpu().numpy(), caption='target_wav', sample_rate=self.sample_rate)) self.logger.experiment.log({"audios_train": examples}) return loss
def post_audio(self, y_orig, y_augm, rate, tag="Data augmentation example"): ''' Uploads two audio samples to W&B. Parameters: y_orig (numpy.ndarray): The original track y_augm (numpy.ndarray): The augmented track rate (int): The sample rate of the audio track for correct playback speed tag (str): The tag under which the samples should be visible (you could also use track name) ''' wandb.log({tag: [ \ wandb.Audio(y_orig, caption="Original", sample_rate=rate), \ wandb.Audio(y_augm, caption="Augmented", sample_rate=rate), \ ]})
def validation_step(self, batch, batch_idx): audio, audio_len = batch audio_mel, audio_mel_len = self.audio_to_melspec_precessor( audio, audio_len) audio_pred = self(spec=audio_mel) audio_pred_mel, _ = self.audio_to_melspec_precessor( audio_pred.squeeze(1), audio_len) loss_mel = F.l1_loss(audio_mel, audio_pred_mel) self.log("val_loss", loss_mel, prog_bar=True, sync_dist=True) # plot audio once per epoch if batch_idx == 0 and isinstance(self.logger, WandbLogger): clips = [] specs = [] for i in range(min(5, audio.shape[0])): clips += [ wandb.Audio( audio[i, :audio_len[i]].data.cpu().numpy(), caption=f"real audio {i}", sample_rate=self.sample_rate, ), wandb.Audio( audio_pred[i, 0, :audio_len[i]].data.cpu().numpy().astype( 'float32'), caption=f"generated audio {i}", sample_rate=self.sample_rate, ), ] specs += [ wandb.Image( plot_spectrogram_to_numpy(audio_mel[ i, :, :audio_mel_len[i]].data.cpu().numpy()), caption=f"real audio {i}", ), wandb.Image( plot_spectrogram_to_numpy(audio_pred_mel[ i, :, :audio_mel_len[i]].data.cpu().numpy()), caption=f"generated audio {i}", ), ] self.logger.experiment.log({ "audio": clips, "specs": specs }, commit=False)
def on_validation_epoch_end(self): val_ids = [0] if self.dev_mode else [0, 1, 2] for idx in val_ids: estimation = {} for target_name in self.target_names: estimation[target_name] = get_estimation( idx, target_name, self.valid_estimation_dict) if estimation[target_name] is None: continue if estimation[target_name] is not None: estimation[target_name] = estimation[target_name].astype( np.float32) if self.current_epoch > 10 and isinstance( self.logger, WandbLogger): self.logger.experiment.log({ 'result_sample_{}_{}'.format( self.current_epoch, target_name): [ wandb.Audio( estimation[target_name][44100 * 60:44100 * 65], caption='{}_{}'.format(idx, target_name), sample_rate=44100) ] })
def add_audio(self, tag, snd_tensor, global_step=None, sample_rate=44100, caption=None): """Add audio data to summary. Args: tag (string): Data identifier snd_tensor (torch.Tensor, numpy.array, or string/blobname): Sound data global_step (int): Global step value to record sample_rate (int): sample rate in Hz Shape: snd_tensor: :math:`(1, L)`. The values should lie between [-1, 1]. """ wandb.log( { tag: [ wandb.Audio(snd_tensor, caption=caption, sample_rate=sample_rate / 1000.) ], 'epoch': global_step }, commit=True)
def on_epoch_end(self, runner: "IRunner"): if (runner.epoch - 1) % 10 == 0: mel = torch.load(self.mel_path) hop_length = 256 # pad input mel with zeros to cut artifact # see https://github.com/seungwonpark/melgan/issues/8 zero = torch.full((1, 80, 10), -11.5129).to(mel.device) mel = torch.cat((mel, zero), dim=2) generator = get_nn_from_ddp_module(runner.model)["generator"] if torch.cuda.is_available(): mel.to("cuda") mel = mel.type(torch.cuda.FloatTensor) audio = generator.forward(mel).detach().cpu() audio = audio.squeeze() # collapse all dimension except time axis audio = audio[:-(hop_length * 10)] audio = MAX_WAV_VALUE * audio audio = audio.clamp(min=-MAX_WAV_VALUE, max=MAX_WAV_VALUE - 1) audio = audio.short() audio = audio.cpu().detach().numpy() try: import wandb wandb.log( { f"generated_{runner.epoch}.wav": [ wandb.Audio(audio, caption=self.mel_path, sample_rate=22050) ] }, step=runner.epoch) except: Warning("can't import wandb") out_path = self.out_name + f"_{runner.epoch}.wav" write(out_path, 22050, audio)
def _make_wandb_audio(frequency, caption): SAMPLE_RATE = 44100 DURATION_SECONDS = 1 data = np.sin(2 * np.pi * np.arange(SAMPLE_RATE * DURATION_SECONDS) * frequency / SAMPLE_RATE) return wandb.Audio(data, SAMPLE_RATE, caption)
def make_wandb_audio(x, caption, fs, is_multi=False): if is_multi: np_x = x.detach().cpu().numpy().reshape((x.shape[1], -1)) np_x = np_x / (np.max(np.abs(np_x), axis=1)[:,None] + 1e-7) audio_objs = [] for i in range(min(4, len(np_x))): audio_objs.append(wandb.Audio(np_x[i].reshape(-1), caption=caption + '_chan_{}'.format(i), sample_rate=fs)) else: np_x = x.detach().cpu().numpy().reshape((-1)) np_x = np_x / (np.max(np.abs(np_x))+ 1e-7) audio_objs = [wandb.Audio(np_x, caption=caption, sample_rate=fs)] return audio_objs
def dict_to_audios(self, scope_name, audios, sample_rate): for key, value in audios.items(): if value.dtype == "float16": value = value.astype("float32") try: self.log_dict["{}/{}".format(scope_name, key)] = wandb.Audio( value, sample_rate=sample_rate) except RuntimeError: traceback.print_exc()
def valid(self, step): self.model.eval() index = random.randint(0, len(self.noisy_files)-1) noisy = dataloader.read_wav(self.noisy_files[index]) write_wav(os.path.join(self.output_dir, "noisy_{}.wav".format(step)), noisy) wandb.log({"noisy": [wandb.Audio(os.path.join(self.output_dir, "noisy_{}.wav".format(step)), caption="noisy", sample_rate=self.sr)]}) noisy = torch.tensor([noisy], dtype=torch.float32, device=self.device) with torch.no_grad(): clean, noise = self.model(noisy) clean = np.squeeze(clean.cpu().detach().numpy()) write_wav(os.path.join(self.output_dir, "clean_{}.wav".format(step)), clean, self.sr) wandb.log({"clean": [wandb.Audio(os.path.join(self.output_dir, "clean_{}.wav".format(step)), caption="clean", sample_rate=self.sr)]}) noise = np.squeeze(noise.cpu().detach().numpy()) write_wav(os.path.join(self.output_dir, "noise_{}.wav".format(step)), noise, self.sr) wandb.log({"noise": [wandb.Audio(os.path.join(self.output_dir, "noise_{}.wav".format(step)), caption="noise", sample_rate=self.sr)]})
def test_audio_refs(): audioObj = wandb.Audio( "https://wandb-artifacts-refs-public-test.s3-us-west-2.amazonaws.com/StarWars3.wav" ) art = wandb.Artifact("audio_ref_test", "dataset") art.add(audioObj, "audio_ref") audio_expected = { "_type": "audio-file", "caption": None, } assert utils.subdict(audioObj.to_json(art), audio_expected) == audio_expected
def test_audio_transform(): audio = np.random.uniform(-1, 1, 44100) with CliRunner().isolated_filesystem(): meta = wandb.Audio.transform([wandb.Audio(audio, sample_rate=44100)], ".", "test", 0) assert meta == { '_type': 'audio', 'count': 1, 'sampleRates': [44100], 'durations': [1.0] } assert os.path.exists("media/audio/test_0_0.wav")
def log_audios(self, params, logs): inputs = [ self.model.get_layer(l).output for l in params.get('in_layers', None) ] outs = [ self.model.get_layer(l).output for l in params.get('out_layers', None) ] predict_fn = tf.keras.backend.function(inputs=inputs, outputs=outs) test_data = params.get('test_data', None) log_y = params.get('log_gt', True) join_by = params.get('join_by', None) test_data.step = 0 test_data.intraepoch_step = 0 x, y = test_data.__getitem__(0) sr = params.get('sr', 16000) y_pred = predict_fn(x) n_samples = y_pred[0].shape[0] batch_data = test_data.data.iloc[:n_samples].reset_index() if join_by is not None: for i, g in enumerate(batch_data[join_by].unique()): g_data = batch_data.loc[batch_data[join_by] == g] max_samples = g_data.end.max() g_audio = np.zeros((max_samples, )) for logid, row in g_data.iterrows(): duration = int(row['end']) - int(row['start']) window = np.concatenate([ np.linspace(0, 1, duration // 2), np.linspace(1, 0, duration - duration // 2) ]) g_audio[int(row['start']):int(row['end'])] = g_audio[ int(row['start'] ):int(row['end'])] + window * y_pred[0][int(logid)] if isinstance(sr, int): sr_i = sr else: sr_i = int(g_data.iloc[0][sr]) wandb.log({ g.split('/')[-1]: wandb.Audio(g_audio, caption=g.split('/')[-1], sample_rate=sr_i) })
def _log_results(self, audio, decoded_targets, decoded_preds, num_examples=4): audios = [] examples = list(zip(audio, decoded_targets, decoded_preds))[:num_examples] for (waveform, targ, pred) in examples: caption = f"Targ:{targ} --- Pred:{pred}\n" audios.append( wandb.Audio(waveform.cpu(), caption=caption, sample_rate=16000)) wandb.log({"examples": audios})
def log_songs(prefix, songs, names, log_name): log = [] for song, name in zip(songs, names): song.write_midi(os.path.join(wandb.run.dir, prefix + name + ".mid")) midi_to_wav(os.path.join(wandb.run.dir, prefix + name + ".mid"), os.path.join(wandb.run.dir, prefix + name + ".wav")) log.append( wandb.Audio(os.path.join(wandb.run.dir, prefix + name + ".wav"), caption="original", sample_rate=32)) wandb.log({log_name: log})
def on_epoch_end(self, *args): validation_X = self.validation_data[0] validation_y = self.validation_data[1] val_scales = scales["dirty"][:20] validation_length = len(validation_X) indices = np.random.choice(validation_length, 1, replace=False) predictions = self.model.predict(validation_X[indices]) print("Min: ", predictions.min(), "Max: ", predictions.max()) predictions = predictions.clip(0, 1) # np.max(abs(predictions)) norm_pred = [] norm_in = [] clean_in = [] for i, idx in enumerate(indices): scale = val_scales[idx] pred = np.squeeze(predictions[i]) norm = np.squeeze(validation_X[idx]) clean = np.squeeze(validation_y[idx]) norm_pred.append(audio_utilities.griffin_lim(pred, scale)) norm_in.append(audio_utilities.griffin_lim(norm, scale)) clean_in.append(audio_utilities.griffin_lim(clean, scale)) wandb.log( { "clean_audio": [ wandb.Audio(audio, sample_rate=sample_rate) for audio in clean_in ], "noisy_audio": [ wandb.Audio(audio, sample_rate=sample_rate) for audio in norm_in ], "audio": [ wandb.Audio(audio, sample_rate=sample_rate) for audio in norm_pred ] }, commit=False)
def validate(self, name): try: noisy, _ = next(self.val_iter) except: self.val_iter = iter(self.train_dataloader) noisy, _ = next(self.val_iter) audio_utils.write_wav( os.path.join(self.output_dir, "noisy_{}.wav".format(name)), noisy) wandb.log({ "noisy": [ wandb.Audio(os.path.join(self.output_dir, "noisy_{}.wav".format(name)), caption="noisy", sample_rate=16000) ] }) with torch.no_grad(): m = noisy.mean() noisy = (noisy - m) noisy = torch.tensor([noisy]).to(self.device) out = self.model(noisy) clean = np.squeeze(out.cpu().detach().numpy()) clean = clean + m audio_utils.write_wav( os.path.join(self.output_dir, "clean_{}.wav".format(name)), clean) wandb.log({ "clean": [ wandb.Audio(os.path.join(self.output_dir, "clean_{}.wav".format(name)), caption="clean", sample_rate=16000) ] }) torch.save(self.model.state_dict(), os.path.join(self.output_dir, "{}.pth".format(name)))
def on_test_epoch_end(self): results = museval.EvalStore(frames_agg='median', tracks_agg='median') for idx in range(self.musdb_test.num_tracks): estimation = {} for target_name in self.target_names: estimation[target_name] = get_estimation(idx, target_name, self.test_estimation_dict) if estimation[target_name] is not None: estimation[target_name] = estimation[target_name].astype(np.float32) # Real SDR if len(estimation) == len(self.target_names): track_length = self.musdb_test.musdb_reference[idx].samples estimated_targets = [estimation[target_name][:track_length] for target_name in self.target_names] if track_length > estimated_targets[0].shape[0]: raise NotImplementedError else: estimated_targets_dict = {target_name: estimation[target_name][:track_length] for target_name in self.target_names} track_score = museval.eval_mus_track( self.musdb_test.musdb_reference[idx], estimated_targets_dict ) score_dict = track_score.df.loc[:, ['target', 'metric', 'score']].groupby( ['target', 'metric'])['score'] \ .median().to_dict() if isinstance(self.logger, WandbLogger): self.logger.experiment.log( {'test_result/{}_{}'.format(k1, k2): score_dict[(k1, k2)] for k1, k2 in score_dict.keys()}) else: print(track_score) results.add_track(track_score) if idx == 1 and isinstance(self.logger, WandbLogger): self.logger.experiment.log({'result_sample_{}_{}'.format(self.current_epoch, target_name): [ wandb.Audio(estimation[target_name], caption='{}_{}'.format(idx, target_name), sample_rate=44100)]}) if isinstance(self.logger, WandbLogger): result_dict = results.df.groupby( ['track', 'target', 'metric'] )['score'].median().reset_index().groupby( ['target', 'metric'] )['score'].median().to_dict() self.logger.experiment.log( {'test_result/agg/{}_{}'.format(k1, k2): result_dict[(k1, k2)] for k1, k2 in result_dict.keys()} ) else: print(results)
def validation_epoch_end(self, outputs: List[Any]) -> None: for idx in [0]: estimation = {} for target_name in self.target_names: estimation[target_name] = get_estimation(idx, target_name, self.valid_estimation_dict) if estimation[target_name] is None: continue if estimation[target_name] is not None: estimation[target_name] = estimation[target_name].astype(np.float32) if self.current_epoch > 1 and isinstance(self.logger, WandbLogger): track = estimation[target_name] if track.shape[0] > 40 * 44100: track = track[44100 * 20:44100 * 40] self.logger.experiment.log({'result_sample_{}_{}'.format(self.current_epoch, target_name): [ wandb.Audio(track, caption='{}_{}'.format(idx, target_name), sample_rate=44100)]}) reduced_loss = torch.stack(outputs).mean() self.log('val_loss', reduced_loss, prog_bar=False, logger=True, on_step=False, on_epoch=True, sync_dist=True) print(reduced_loss)
def test_audio_to_json(mocked_run): audio = np.zeros(44100) audioObj = wandb.Audio(audio, sample_rate=44100) audioObj.bind_to_run(mocked_run, "test", 0) meta = wandb.Audio.seq_to_json([audioObj], mocked_run, "test", 0) assert os.path.exists(os.path.join(mocked_run.dir, meta["audio"][0]["path"])) meta_expected = { "_type": "audio", "count": 1, "sampleRates": [44100], "durations": [1.0], } assert utils.subdict(meta, meta_expected) == meta_expected audio_expected = { "_type": "audio-file", "caption": None, "size": 88244, } assert utils.subdict(meta["audio"][0], audio_expected) == audio_expected
def test_audio_captions(): audio = np.random.uniform(-1, 1, 44100) sample_rate = 44100 caption1 = "This is what a dog sounds like" caption2 = "This is what a chicken sounds like" # test with all captions wbaudio1 = wandb.Audio(audio, sample_rate=sample_rate, caption=caption1) wbaudio2 = wandb.Audio(audio, sample_rate=sample_rate, caption=caption2) assert wandb.Audio.captions([wbaudio1, wbaudio2]) == [caption1, caption2] # test with no captions wbaudio3 = wandb.Audio(audio, sample_rate=sample_rate) wbaudio4 = wandb.Audio(audio, sample_rate=sample_rate) assert wandb.Audio.captions([wbaudio3, wbaudio4]) is False # test with some captions wbaudio5 = wandb.Audio(audio, sample_rate=sample_rate) wbaudio6 = wandb.Audio(audio, sample_rate=sample_rate, caption=caption2) assert wandb.Audio.captions([wbaudio5, wbaudio6]) == ["", caption2]
def test(epoch, test_dataset, model, device): model.eval() mel, wav = test_dataset[0] mel, wav = mel.to(device), wav.to(device) with torch.no_grad(): pred = model.inference( mel.unsqueeze(0)).squeeze(0).detach().cpu().numpy() wandb.log({ "examples": [ wandb.Audio(pred, caption="Epoch {}".format(epoch), sample_rate=22050) ] }) torch.save(model.state_dict(), 'checkpoints/wavenet_epoch{}.pt'.format(epoch)) return pred
def test_audio_to_json(mocked_run): audio = np.zeros(44100) audioObj = wandb.Audio(audio, sample_rate=44100) audioObj.bind_to_run(mocked_run, "test", 0) meta = wandb.Audio.seq_to_json( [audioObj], mocked_run, "test", 0) assert os.path.exists(os.path.join(mocked_run.dir, meta['audio'][0]['path'])) meta_expected = { '_type': 'audio', 'count': 1, 'sampleRates': [44100], 'durations': [1.0], } assert utils.subdict(meta, meta_expected) == meta_expected audio_expected = { '_type': 'audio-file', 'caption': None, 'sample_rate': 44100, 'size': 88244, } assert utils.subdict(meta['audio'][0], audio_expected) == audio_expected
def test_audio_to_json(): audio = np.zeros(44100) with CliRunner().isolated_filesystem(): run = wandb.wandb_run.Run() meta = wandb.Audio.seq_to_json( [wandb.Audio(audio, sample_rate=44100)], run, "test", 0) assert os.path.exists(os.path.join(run.dir, meta['audio'][0]['path'])) meta_expected = { '_type': 'audio', 'count': 1, 'sampleRates': [44100], 'durations': [1.0], } assert utils.subdict(meta, meta_expected) == meta_expected audio_expected = { '_type': 'audio-file', 'caption': None, 'sample_rate': 44100, 'size': 88244, } assert utils.subdict(meta['audio'][0], audio_expected) == audio_expected
def get_audios(items: List[str], predictions: List[Any], targets: List[Any]) -> List[wandb.Audio]: """Returns a list of wandb.Audio objects :param items: list of items of all inputs :type items: List[str] :param predictions: model predictions :type predictions: List[Any] :param targets: ground truths :type targets: List[Any] """ audios = [] for item, prediction, target in zip(items, predictions, targets): audio = item.load() caption = 'Pred: {} GT: {}'.format(prediction, target) audios.append( wandb.Audio(audio['signal'], caption=caption, sample_rate=audio['rate'])) return audios
vid4 = _make_video() def _make_wandb_audio(frequency, caption): SAMPLE_RATE = 44100 DURATION_SECONDS = 1 data = np.sin(2 * np.pi * np.arange(SAMPLE_RATE * DURATION_SECONDS) * frequency / SAMPLE_RATE) return wandb.Audio(data, SAMPLE_RATE, caption) aud1 = _make_wandb_audio(440, "four forty") aud_ref_https = wandb.Audio( "https://wandb-artifacts-refs-public-test.s3-us-west-2.amazonaws.com/StarWars3.wav", caption="star wars https") aud_ref_s3 = wandb.Audio("s3://wandb-artifacts-refs-public-test/StarWars3.wav", caption="star wars s3") aud_ref_gs = wandb.Audio("gs://wandb-artifact-refs-public-test/StarWars3.wav", caption="star wars gs") np_data = np.random.randint(255, size=(4, 16, 16, 3)) def _make_wandb_table(): classes = wandb.Classes([ { "id": 1, "name": "tree" },