def ppr_to_audio(ppr, save_dir, sfpath=soundfont(), tempo=120, save_npy=False, save_midi=True, convert_mp3=True): song_name = ppr.name wave_file_path = os.path.join(save_dir, f"{song_name}.wav") pm = ppr.to_pretty_midi(constant_tempo=tempo) audio = pm_to_wave(pm, wave_file_path, sfpath) print("wave file length:", len(audio)) print("wave file saved to", wave_file_path) if save_npy: npy_path = os.path.join(save_dir, f'{song_name}.npy') np.save(npy_path, ppr) print(f"{song_name}.npy saved!") if save_midi: midi_path = os.path.join(save_dir, f'{song_name}.midi') ppr.write(midi_path) print(f"{song_name}.midi file saved!") if convert_mp3: sound = AS.from_wav(wave_file_path) mp3_file_path = f"{wave_file_path[:-4]}.mp3" sound.export(mp3_file_path, format="mp3") os.remove(wave_file_path) print("The wave file is replaced to", mp3_file_path, '\n') else: return Audio(wave_file_path) return Audio(mp3_file_path)
def get_sample(search_dir, fmt='mp3'): sound_paths = glob.glob(os.path.join(search_dir, f"*.{fmt}")) sound_paths.sort() if len(sound_paths) > 1: print(f"{len(sound_paths)} sounds found in {search_dir}") for i, path in enumerate(sound_paths): print(f"{i}: {path.split('/')[-1]}") idx = input("input the number of sound(empty for the last):") or -1 sound_path = sound_paths[int(idx)] elif sound_paths: sound_path = sound_paths[0] else: print(f"no sound file found in {search_dir}") return (None, None) print(f"sound is loaded from {sound_path}") song_name = sound_path.split('/')[-1].split('.')[0] midi_path = os.path.join(search_dir, f'{song_name}.midi') if os.path.exists(midi_path): ppr = Multitrack(midi_path) print(f"midi is loaded from {midi_path}") return (Audio(sound_path), ppr) else: print(f"failed to load midi from {midi_path}") return (Audio(sound_path), None)
def demo(self, track, artist=None, album=None): producer_proba, top_songs, producer = self.query(track, artist, album, use_spotify=True) if type(top_songs) == type(None): print('Song mp3 not available. :-( ') return producer_proba, top_songs, producer # get top song info query_preview_url = self.query_preview_url top_track = top_songs['track'].iloc[0] top_artist = top_songs['artist'].iloc[0] top_song_url = self.collection.find_one({'track': top_track})['preview_url'] # process audio: y_query, sr_query = load_mp3_from_url(query_preview_url) y_top, sr_top = load_mp3_from_url(top_song_url) if type(producer) == type(None): print("True Producer: Not Found") else: print("True Producer: {}".format(producer)) print() print("Producer Probabilities:") print(producer_proba) print() print("Query Song: {} by {}".format(track, artist)) IPython.display.display(Audio(data=y_query, rate=sr_query)) print() print("Most Similar Song: {} by {}".format(top_track, top_artist)) IPython.display.display(Audio(data=y_top, rate=sr_top)) return producer_proba, top_songs, producer
def show(self, ax=None, figsize=(5, 1), player=True, title=None, **kwargs): if ax is None: _, ax = plt.subplots(figsize=figsize) if title: ax.set_title("Class: " + str(title) + " \nfilename: " + str(self.fn)) timesteps = np.arange(self.original_signal.shape[1]) / self.sample_rate ax.plot(timesteps, self.original_signal[0]) if self.original_signal.size(0) > 1: # Check if mono or stereo ax.plot(timesteps, self.original_signal[1]) ax.set_xlabel('Original Signal Time (s)') plt.show() timesteps = np.arange( self.processed_signal.shape[1]) / self.sample_rate _, ax = plt.subplots(figsize=figsize) if title: ax.set_title("Class: " + str(title) + " \nfilename: " + str(self.fn)) ax.plot(timesteps, self.processed_signal[0]) if self.processed_signal.size(0) > 1: # Check if mono or stereo ax.plot(timesteps, self.processed_signal[1]) ax.set_xlabel('Processed Signal Time (s)') plt.show() if player: # unable to display an IPython 'Audio' player in plt axes display("Original signal") display(Audio(self.original_signal, rate=self.sample_rate)) display("Processed signal") display(Audio(self.processed_signal, rate=self.sample_rate))
def audio_to_html(data_or_str, embed=False, max_audio_length=20): html = '' if not is_audio(data_or_str): raise ValueError(f'Unknown audio format: {data_or_str}') if embed: if is_audio_path(data_or_str): audio_data, sample_rate = load_audio(data_or_str, return_sample_rate=True) else: assert is_audio_array(data_or_str) audio_data = data_or_str sample_rate = 16000 # assume sampling rate of 16kHz if max_audio_length is not None and \ max_audio_length * sample_rate <= audio_data.shape[0]: html = f'(original length was ' \ f'{int(audio_data.shape[0]/sample_rate)}s)' audio_data = audio_data[:max_audio_length * sample_rate] html += Audio(audio_data, rate=sample_rate)._repr_html_() else: assert is_audio_path(data_or_str) if is_nist_sphere_file(data_or_str): raise ValueError( f'Audio at {data_or_str} is in nist/sphere format, ' 'which the ipython Audio applet cannot play. ' 'Use embed_audio=True instead.') path, length = cache_audio_local(data_or_str, max_audio_length) if path is None: html = Templates.warning.format( content=f'Audio too long to display ({length} seconds)') else: html = Audio(filename=str(path))._repr_html_() return html
def play_chords(chords, tempo=160, amplitude=0.1, sample_rate=44100, filepath=None): samples = amplitude * chords_to_samples(chords, tempo, sample_rate) if filepath: from scipy.io import wavfile samples = (2**15 * samples).astype(np.int16) wavfile.write(filepath, sample_rate, samples) return display(Audio(filepath)) else: return display(Audio(samples, rate=sample_rate))
def sound( x, rate=8000, label=''): from IPython.display import display, Audio, HTML if label is '': display( Audio( x, rate=rate)) else: display( HTML( '<style> table, th, td {border: 0px; }</style> <table><tr><td>' + label + '</td><td>' + Audio( x, rate=rate)._repr_html_()[3:] + '</td></tr></table>' ))
def play_audio(waveform, sample_rate): waveform = waveform.numpy() num_channels, num_frames = waveform.shape if num_channels == 1: display(Audio(waveform[0], rate=sample_rate)) elif num_channels == 2: display(Audio((waveform[0], waveform[1]), rate=sample_rate)) else: raise ValueError("Waveform with more than 2 channels are not supported.")
def play(filename, player_id, autoplay=True, normalize=True, start=None, stop=None): if start is not None and stop is not None: raw_wav = raw_wav_segment(filename, start, stop) update_display(Audio(data=raw_wav, autoplay=autoplay, normalize=normalize), display_id=player_id) else: update_display(Audio(filename, autoplay=autoplay, normalize=normalize), display_id=player_id)
def extractPeriod(data, rate, t_start, t_end): t = np.arange(0, len(data)) / rate plt.plot(t, data) duration = t_end - t_start plt.xlabel('$t$') plt.ylabel('$x(t)$') sample_start = int(t_start * rate) sample_end = int(t_end * rate) print(rate) period = data[sample_start:sample_end] audioSideBySide("Original", Audio(data=data, rate=rate), "Extracted period", Audio(np.tile(period, int(1 / duration)), rate=rate)) return period, rate
def play_drum_matrix(mat, tempo=120.0, threshold=0.0): # generate audio audio_data, mididata = get_audio_from_drum_matrix(mat, tempo=tempo, threshold=threshold) display(Audio(audio_data, rate=44100)) return audio_data, mididata
def play(self): left_channel = self.data[:, 0] right_channel = self.data[:, 1] display( Audio([left_channel, right_channel], rate=self.rate, autoplay=True))
def play(abc, title='', lyrics='', tempo=TEMPO, key=KEY, soundfont_path='./GeneralUser GS v1.471.sf2'): if not os.path.exists(soundfont_path): raise FileNotFoundError(f'Could not find {soundfont_path}') print(abc) header = HEADER.format(title=title, tempo=str(tempo), key=key) random_string = str(uuid4()) abc_filename = random_string + '.abc' with open(abc_filename, 'w') as f: f.write('\n'.join([header, abc, lyrics])) svg_filename = 'Out001.svg' subprocess.run(['abcm2ps', '-g', abc_filename]) display(SVG(svg_filename)) midi_filename = random_string + '.mid' subprocess.run(['abc2midi', abc_filename, '-o', midi_filename]) wav_filename = random_string + '.wav' subprocess.run([ 'fluidsynth', '-i', '-F', wav_filename, soundfont_path, midi_filename ]) display(Audio(wav_filename)) os.remove(abc_filename) os.remove(svg_filename) os.remove(midi_filename) os.remove(wav_filename)
def melspec_to_audio(self, mel_spectrogram, log=True, phase=None, transpose=True, audio_out=True): if transpose: mel_spectrogram = mel_spectrogram.T if log: mel_spectrogram = librosa.db_to_power(mel_spectrogram) mel_spectrogram = mel_spectrogram**0.5 magnitude = np.dot(np.linalg.pinv(self._MEL_FILTER), mel_spectrogram) if phase is not None: inverted_signal = librosa.istft(magnitude * phase, hop_length=self._HOP_LENGTH) else: inverted_signal = griffin_lim(magnitude, self._N_FFT, self._HOP_LENGTH, n_iterations=10) if audio_out: return Audio(inverted_signal, rate=self._SAMPLE_RATE) else: return inverted_signal
def beep(inp=1, duration=.1, n=1): rate = 10000 mult = 1.6 * inp if inp else .08 wave = np.sin(mult*np.arange(rate*duration)) for i in range(n): display(Audio(wave, rate=10000, autoplay=True)) time.sleep(duration / .1)
def generate_samples_for_spkr_list( spkr_id_list, npz='', text='', checkpoint='checkpoints/vctk-16khz-cmu-no-boundaries-all-noise-2/bestmodel.pth', output_dir='./', npz_path='/home/ubuntu/loop/data/vctk-16khz-cmu-no-boundaries-all/numpy_features' ): out = [] for spkr in spkr_id_list: output_file_override = 'gen_test_' + str(spkr) loop_dict = generate_sample_with_loop( spkr_id=spkr, npz=npz, text=text, checkpoint=checkpoint, output_dir='./', npz_path= '/home/ubuntu/loop/data/vctk-16khz-cmu-no-boundaries-all/numpy_features', output_file_override=output_file_override) out.append(loop_dict) IPython.display.display( Audio(loop_dict['output_file'] + '.wav', autoplay=True)) return out
def play_sound(self): if self.fs >= 50000: raise ValueError('frequency to high') if self.domain == 'frequency': raise DomainError('cannot play frequency domain signals') #sd.play(self.__y_val, self.fs) Audio(data=self.__y_val, rate=self.fs, autoplay=True)
def play(self, seq, show_player=True, instrument=None): """ Play sequence of chords :param seq: a string containing a space-separated sequence of chords :param show_player: whether to show the player or not :param instrument: user-supplied sf2 sound font (do not set it to use default) :return: None """ if instrument is None: # Use default sound font instrument = get_path_of_data_file("piano_soundfont.sf2") seq = str(seq) audio = [] for chord in seq.split(" "): audio = np.append(audio, self._make_chord(chord, instrument)) if show_player: display(Audio(audio, rate=self._rate, autoplay=False)) return audio
def shift_silent_right(sample_rate, samples): samples[(samples > 200) | (samples < -200)] sampling = samples[(samples > 200) | (samples < -200)] shifted_silent = sampling.tolist() + np.zeros( (samples.shape[0] - sampling.shape[0])).tolist() Audio(shifted_silent, rate=sample_rate) return sample_rate, shifted_silent
def show_sample(melsg, file_id=None, label="", offset=0, data_dir='data', load_clip=False): fig = plt.figure(figsize=(7, 5)) if file_id or label != "": fig.suptitle(' '.join([("XC%s" % file_id) if file_id else "", label])) gs = GridSpec(4, 1, fig, hspace=.1, wspace=0, top=.93) melsg_ax = fig.add_subplot(gs[0:3]) specshow(melsg.squeeze(), y_axis='mel', x_axis='s', ax=melsg_ax) plt.colorbar(melsg_ax.collections[0], ax=melsg_ax, pad=.01) #mfcc_ax = fig.add_subplot(gs[3]) #specshow(mfcc.squeeze(), ax=mfcc_ax, x_axis='s') #mfcc_ax.set_ylabel("MFCC") #mfcc_ax.set_yticks([0,5,10,15]) # TODO: Ensure 22050 is correct frame rate #mfcc_ax.set_xticklabels(["%0.1f"%(t+offset/(22050/512)) # for t in mfcc_ax.get_xticks()]) #plt.colorbar(mfcc_ax.collections[0], ax=mfcc_ax, aspect=7, pad=.01) plt.show() if file_id and load_clip: file_path = os.path.join(data_dir, 'audio', "XC%s.mp3" % file_id) print(file_path) import warnings warnings.simplefilter('ignore') data, samplerate = librosa.load(file_path) display(Audio(data, rate=samplerate))
def mystery_audio_demo2_play(signal_name='mystery1', centre=200, width=20): fmin = centre - 0.5 * width fmax = centre + 0.5 * width if fmin < 0: fmin = 0 fs, x = scipy.io.wavfile.read('data/%s.wav' % signal_name) try: x = x[:, 0] except: pass N = len(x) # Round to power of 2 for FFT efficiency Nz = 2 << (N - 1).bit_length() f = np.arange(Nz // 2 + 1) / Nz * fs / 2 H = (f > fmin) & (f < fmax) X = np.fft.rfft(x, Nz) Y = X * H y = np.fft.irfft(Y)[0:N] y = y / y.max() t = np.arange(N) / fs signal_plot_with_dft(t, x, f, X, lollipop=False, mode='magnitude') display(Audio(y, rate=fs))
def load_audio_frame_raw(filename, frame, context): audio, sr = librosa.load(filename, sr=None, mono=False, offset=frame - context, duration=1 + context) return Audio(audio, rate=sr, normalize=False)
def notebook_audio(audio): """Display IPython Audio object in the notebook :param audio: path to the audio file """ if audio: display(Audio(filename=audio))
def show_audio(a: Tuple[int, np.ndarray])->None: # a: (sample_rate, audio_array) fig, ax = plt.subplots() time_axis = np.linspace(start=0, stop=(len(a[1])/a[0]),num=np.round(len(a[1]))) ax.plot(time_axis, a[1]) ax.set_xlabel('Time (seconds)') ax.set_ylabel('Amplitude') display(Audio(a[1], rate=a[0]))
def automate_trade(self): if (self.automate == True): for indx in self.proposals.index : if(indx not in self.transactions.index): if(self.alert == True): display(Audio(numpy.sin(numpy.linspace(0, 9000, 10000)), rate=200000, autoplay=True)); self.ws.send(json.dumps({"buy": indx, "price": self.proposals.loc[indx]['amount']})) return
def show_audio(wav, sample_rate=DEFAULT_SAMPLE_RATE, focus_points=[0.3], focus_windows=[1000]): """Show all kind of things about this wav """ try: wav = wav.numpy() except: assert isinstance(wav, np.ndarray) wav = wav.squeeze() display(Audio(data=wav, rate=sample_rate)) specplot(wav) _, axes = plt.subplots(nrows=1 + len(focus_points), ncols=1, figsize=(15, 2 + 2 * len(focus_points)), sharey=True) t = np.linspace(0, len(wav) // sample_rate, len(wav)) axes[0].plot(t, wav) for i, (focus_point, focus_window) in enumerate(zip(focus_points, focus_windows)): c = int(len(wav) * focus_point) idx = list(range(c - focus_window // 2, c + focus_window // 2)) t_focused = t[idx] wav_focused = wav[idx] axes[1 + i].plot(t_focused, wav_focused) axes[1 + i].set_title(f"Focused on t={focus_point*len(wav)/sample_rate}") axes[-1].set_xlabel("Time/s") plt.tight_layout() plt.show()
def plot_td_and_fd(t, hprime, f, htildeprime, h=None, htilde=None): from IPython.display import display, clear_output, Audio print('Contrast: {0:.4f}'.format(np.max(np.abs(hprime)) / np.sqrt(np.mean(np.abs(hprime)**2)))) sampling_rate = 1.0/(t[1]-t[0]) plt.close('all') fig, (ax1, ax2) = plt.subplots(1, 2) if h is not None: ax1.plot(t, h, label='Raw data') ax1.plot(t, hprime, label='Filtered data') ax1.legend(loc='lower left'); else: ax1.plot(t, hprime) ax1.set_xlabel('Time (seconds)') ax1.set_ylabel('Detector strain $h$ (dimensionless)') ax1.set_xlim(xmax=t[-1]) ax1.set_ylim(1.1*np.min(hprime), 1.1*np.max(hprime)) ax1.set_title('Time domain') ax1.grid() if htilde is not None: ax2.loglog(f, abs(htilde), label='Raw data') ax2.loglog(f, abs(htildeprime), label='Filtered data') ax2.legend(loc='lower left'); else: ax2.loglog(f, abs(htildeprime)) ax2.set_xlabel('Frequency (Hz)') ax2.set_ylabel(r'Detector strain Fourier transform $\tilde{h}$ (seconds)') ax2.set_xlim(1, sampling_rate/2) ax2.set_title('Frequency domain') ax2.grid() fig.tight_layout() display(Audio(data=hprime, rate=int(sampling_rate), autoplay=False)) return fig, (ax1, ax2)
def iir_notch_play(fnotch=50, alpha=0.5, bode=True): N = 40000 fs = 20e3 t = np.arange(N) / fs hum = 0.5 * np.cos(2 * np.pi * 50 * t) s = 0.2 * np.cos(2 * np.pi * 220 * t) x = hum + s omega0 = 2 * np.pi * fnotch / fs beta = np.cos(omega0) K = 0.5 * (1 + alpha) b = (K, -2 * K * beta, K) a = (1, -2 * K * beta, alpha) f = np.logspace(0, 4, 200) filter_plot(b, a, fs, f=f, bode=bode) y = signal.lfilter(b=b, a=a, x=x) signal_plot(t[0:2000:10], y[0:2000:10]) show() display(Audio(y, rate=fs))
def process_account(self, message): try: sl = message['authorize'] if ('account_list' in sl): sl.pop('account_list') if ('scopes' in sl): sl.pop('scopes') if ('upgradeable_landing_companies' in sl): sl.pop('upgradeable_landing_companies') self.account = pandas.DataFrame(columns=[0]) self.transaction_stream() for key in sl: self.account.loc[key] = {0: str(sl[key])} #for sym in self.forex_major: #self.subscribe(sym) for sym in self.volatility_indices: self.subscribe(sym) #tr = threading.Timer(1, self.ping).start() threading.Thread(target=self.analysis).start() threading.Thread(target=self.ping).start() display( Audio(numpy.sin(numpy.linspace(0, 4 * 2 * numpy.pi, 25)), rate=20000, autoplay=True)) except: pass return
def generate(self, parm_var, do_postfilter=True): config = self.analysis_config for path in self.paths: file_id = splitext(basename(path))[0] print('Synthesizing %s ... ' % (file_id), end='') mgc, lf0, vuv, bap = self._generate_parameters(path, parm_var) if do_postfilter: mgc = merlin_post_filter(mgc, config.alpha) sp = pysptk.mc2sp(mgc, fftlen=config.fft_length, alpha=config.alpha) ap = pyworld.decode_aperiodicity(bap.astype(np.float64), config.sampling_rate, config.fft_length) f0 = self._lf0_to_f0(lf0, vuv) generated = pyworld.synthesize(f0.flatten().astype(np.float64), sp.astype(np.float64), ap.astype(np.float64), config.sampling_rate, config.frame_period) with open(join(self.out_dir, file_id + '.wav'), 'wb') as f: f.write(Audio(generated, rate=config.sampling_rate).data) print('done!')