Beispiel #1
0
    def on_epoch_end(self, epoch, logs={}):
        for _ in range(self.num_tests):
          x, y = self.random_sample()
          y_p = self.model.predict(x.reshape((1,x.shape[0],1)))
          x = np.squeeze(x)
          y = np.squeeze(y)
          y_p = np.squeeze(y_p)

          if self.difference_mask:
            y = x + y
            y_p = x + y_p

          print('x/y_p diff:')
          print(abs(np.sum(x) - np.sum(y_p)))

          print('x vs predicted y')
          plt.plot(x, color='red')
          plt.plot(y_p)
          plt.show()

          print('ground truth y vs predicted y')
          plt.plot(y, color='red')
          plt.plot(y_p)
          plt.show()

          if self.audio_preview:
            print('input sample:')
            ipd.display(ipd.Audio(x, rate=self.sr, autoplay=False))
            print('ground truth:')
            ipd.display(ipd.Audio(y, rate=self.sr, autoplay=False))
            print('prediction')
            ipd.display(ipd.Audio(y_p, rate=self.sr, autoplay=False))

        return
def show_data(df, row):
    # Retrieve information from DF
    audio_data, sampling_rate, label = get_data_sample_rate_and_legend_from_df(df, row)
    
    # Print some stats and display the sound
    print(f"{label}({librosa.get_duration(audio_data, sr=sampling_rate)} sec)")
    ipd.display(ipd.Audio(audio_data, rate=sampling_rate))
    
    print("\n")
    # Make plots
    X = librosa.stft(audio_data)
    Xdb = librosa.amplitude_to_db(abs(X))

    plt.figure(figsize=(8, 16), dpi= 80, facecolor='w', edgecolor='k')

    plt.subplot(3, 1, 1)
    plt.title("Wave")
    librosa.display.waveplot(audio_data, sr=sampling_rate, x_axis="time")

    plt.subplot(3, 1, 2)
    plt.title("MEL")
    librosa.display.specshow(Xdb, sr=sampling_rate, x_axis="time", y_axis="mel")

    plt.subplot(3, 1, 3)
    plt.title("HZ")
    librosa.display.specshow(Xdb, sr=sampling_rate, x_axis="time", y_axis="hz")

    print("Audio")
    ipd.Audio(audio_data, rate = sampling_rate)
Beispiel #3
0
def style_transfer_v2():
    audio_paths_ = 'data/examples_filelist_v2.txt'
    dataloader_ = TextMelLoader(audio_paths_, hparams)
    datacollate_ = TextMelCollate(1)
    ## Load data
    # for file_idx in range(10):
    #     audio_path, text, sid = dataloader_.audiopaths_and_text[file_idx]
    #     print(dict(file_idx=file_idx, audio_path=audio_path, text=text))

    file_idx = 8
    audio_path, text, sid = dataloader_.audiopaths_and_text[file_idx]
    print(dict(file_idx=file_idx, audio_path=audio_path, text=text, sid=sid))

    # get audio path, encoded text, pitch contour and mel for gst
    text_encoded = torch.LongTensor(
        text_to_sequence(text, hparams.text_cleaners,
                         arpabet_dict))[None, :].cuda()
    pitch_contour = dataloader_[file_idx][3][None].cuda()
    mel = load_mel(audio_path)

    # load source data to obtain rhythm using tacotron 2 as a forced aligner
    x, y = mellotron.parse_batch(datacollate_([dataloader_[file_idx]]))
    ipd.Audio(audio_path, rate=hparams.sampling_rate)

    # Style Transfer (Rhythm and Pitch Contour)
    with torch.no_grad():
        # get rhythm (alignment map) using tacotron 2
        mel_outputs, mel_outputs_postnet, gate_outputs, rhythm = mellotron.forward(
            x)
        rhythm = rhythm.permute(1, 0, 2)
    speaker_id = next(female_speakers) if np.random.randint(2) else next(
        male_speakers)
    speaker_id = torch.LongTensor([speaker_id]).cuda()

    with torch.no_grad():
        mel_outputs, mel_outputs_postnet, gate_outputs, _ = mellotron.inference_noattention(
            (text_encoded, mel, speaker_id, pitch_contour, rhythm))

    plot_mel_f0_alignment(x[2].data.cpu().numpy()[0],
                          mel_outputs_postnet.data.cpu().numpy()[0],
                          pitch_contour.data.cpu().numpy()[0, 0],
                          rhythm.data.cpu().numpy()[:, 0].T)
    plt.show()

    out_mel = mel_outputs_postnet.data.cpu().numpy()[0]
    t0 = time.time()
    # wav = aukit.inv_mel_spectrogram()
    out_wav = infer_waveform_melgan(out_mel)
    print(time.time() - t0)
    aukit.play_audio(out_wav, sr=22050)

    t0 = time.time()
    with torch.no_grad():
        audio = denoiser(waveglow.infer(mel_outputs_postnet, sigma=0.8),
                         0.01)[:, 0]
    ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)
    out_wav = audio[0].data.cpu().numpy()
    print(time.time() - t0)
    aukit.play_audio(out_wav, sr=22050)
Beispiel #4
0
def listen(signals, sr=None):
    if isinstance(signals, str):
        ipd.display(ipd.Audio(signals))
    elif isinstance(signals, np.ndarray):
        ipd.display(ipd.Audio(signals, rate=sr))
    elif isinstance(signals, list):
        for signal in signals:
            ipd.display(ipd.Audio(signal, rate=sr))
Beispiel #5
0
 def play(self, with_clicks=False):
     if not with_clicks:
         return ipd.Audio(self.path, rate=self.sampling_rate)
     else:
         clicks = librosa.clicks(self.beat_times,
                                 sr=self.sampling_rate,
                                 length=len(self.waveform))
         audio = self.waveform + clicks
         return ipd.Audio(audio, rate=self.sampling_rate)
Beispiel #6
0
def audio(S, hop_length=HOP_LENGTH, sr=SR):
    if len(S.shape) > 1:
        y = signal(S, hop_length)
        if y.size > 0:
            return ipd.display(ipd.Audio(y, rate=sr))
        else:
            return ipd.display(ipd.Audio(np.zeros(hop_length * 2), rate=sr))
    else:
        return ipd.display(ipd.Audio(S, rate=sr))
Beispiel #7
0
def resynthesize_sources_params(W, H, midi, signal, source_activations: List[numpy.ndarray]) -> None:
    for source_index, source in enumerate(source_activations):
        channel_H = source * H
        Y = numpy.dot(W, channel_H) * signal.X_phase
        print(f'Channel {source_index}:')
        reconstructed_signal = librosa.istft(Y, length=len(signal.x))
        ipd.display(ipd.Audio(reconstructed_signal, rate=signal.sr))

        mask = numpy.dot(W, channel_H) / (numpy.dot(W, H) + numpy.finfo(float).eps)
        Y2 = mask * signal.S
        print(f'Channel {source_index} (masked):')
        reconstructed_signal2 = librosa.istft(Y2, length=len(signal.x))
        ipd.display(ipd.Audio(reconstructed_signal2, rate=signal.sr))
Beispiel #8
0
def visualize_audio_data(data_x, data_y, sr=44100):
  for x, y in zip(data_x, data_y):
    print('x data:')
    plt.plot(x)
    print('y data:')
    plt.plot(y, color='red')
    plt.show()
    print('x data:')
    ipd.display(ipd.Audio(x, rate=sr, autoplay=False))
    print('y data:')
    ipd.display(ipd.Audio(y, rate=sr, autoplay=False))
    plt.show()
    print('\n')
Beispiel #9
0
def resynthesize_sources_midi(W, H, midi, signal, tol_on, tol_off):
    channel_activations = initialize_activations(signal, midi, get_pitches(midi), tol_on, tol_off, by_channel=True)
    for channel in sorted(channel_activations.keys()):
        channel_H = channel_activations[channel] * H
        Y = numpy.dot(W, channel_H) * signal.X_phase
        print(f'Channel {channel}:')
        reconstructed_signal = librosa.istft(Y, length=len(signal.x))
        ipd.display(ipd.Audio(reconstructed_signal, rate=signal.sr))

        mask = numpy.dot(W, channel_H) / (numpy.dot(W, H) + numpy.finfo(float).eps)
        Y2 = mask * signal.S
        print(f'Channel {channel} (masked):')
        reconstructed_signal2 = librosa.istft(Y2, length=len(signal.x))
        ipd.display(ipd.Audio(reconstructed_signal2, rate=signal.sr))
Beispiel #10
0
def nmf_display(W, H, signal, components):
    display_components(components)

    # Re-create the STFT from all NMF components.
    Y = numpy.dot(W, H) * signal.X_phase

    # Transform the STFT into the time domain.
    print('Reconstructed')
    reconstructed_signal = librosa.istft(Y, length=len(signal.x))
    ipd.display(ipd.Audio(reconstructed_signal, rate=signal.sr))
    
    print('Residual')
    residual = signal.x - reconstructed_signal
    residual[0] = 1 # hack to prevent automatic gain scaling
    ipd.display(ipd.Audio(residual, rate=signal.sr))
Beispiel #11
0
def compare_inverse(x, x_hat, res=None):
    wav = mels_to_wav(x.unsqueeze(0))
    wav_hat = mels_to_wav(x_hat.unsqueeze(0))
    plt.figure(figsize=[8, 8])
    if res is not None:
        plt.subplot(2, 1, 1)
        plt.plot(res)
        plt.grid()
    plt.subplot(2, 2, 3)
    plt.imshow(x.detach().cpu(), origin='lower', cmap='magma')
    plt.subplot(2, 2, 4)
    plt.imshow(x_hat.detach().cpu(), origin='lower', cmap='magma')
    plt.show()
    ipd.display(ipd.Audio(wav, rate=16000))
    ipd.display(ipd.Audio(wav_hat, rate=16000))
Beispiel #12
0
def exercise_beating(show_result=True):
    """Exercise 1: Beating
    Notebook: PCP_signal.ipynb"""
    if show_result is False:
        return

    Fs = 100
    dur = 5
    omega_1 = 10
    omega_2 = 11
    x1, t = generate_sinusoid(dur=dur, Fs=Fs, amp=0.5, freq=omega_1)
    x2, t = generate_sinusoid(dur=dur, Fs=Fs, amp=0.5, freq=omega_2)
    title = r'Beating with $\omega_1=%.1f$ and $\omega_2=%.1f$ (beating period: %.1f)' % \
        (omega_1, omega_2, np.abs(omega_2-omega_1))
    plot_interference(t, x1, x2, ylim=[-1.1, 1.1], xlim=[0, dur], title=title)
    plot_interference(t,
                      x1,
                      x2,
                      ylim=[-1.1, 1.1],
                      xlim=[1, 2],
                      title=r'Zoom-in section')

    Fs = 4000
    dur = 5
    omega_1 = 200
    omega_2 = 203
    x1, t = generate_sinusoid(dur=dur, Fs=Fs, amp=0.5, freq=omega_1)
    x2, t = generate_sinusoid(dur=dur, Fs=Fs, amp=0.5, freq=omega_2)
    title = r'Beating with $\omega_1=%.1f$ and $\omega_2=%.1f$ (beating frequency: %.1f)' \
        % (omega_1, omega_2, np.abs(omega_2-omega_1))
    plot_interference(t, x1, x2, ylim=[-1.1, 1.1], xlim=[0, dur], title=title)

    ipd.display(ipd.Audio(x1 + x2, rate=Fs))
def generate_images(images, source='fake', save=True):
    # make sure the training parameter is set to False because we
    # don't want to train the batchnorm layer when doing inference.
    
    if(source=='fake'):
        disp_images = images['fake_images']
    elif(source=='real'):
        disp_images = images['real_images']
    else:
        raise ValueError
    plt.title(source.capitalize()+" log-magnitudes")
    for i in range(16):
        plt.subplot(4, 4, i+1)
        plt.imshow(np.transpose(disp_images[i, :, :, 0]) * 127.5, cmap="magma", origin="lower", aspect="auto")
        plt.axis('off')
    if(save):
        plt.savefig('images/image_at_{}_{}.png'.format(images['global_step'][0, 0], source))
    plt.show()
    
    plt.title(source.capitalize()+" instantaneous frequencies")
    for i in range(16):
        plt.subplot(4, 4, i+1)
        plt.imshow(np.transpose(disp_images[i, :, :, 1]) * 127.5, cmap="magma", origin="lower", aspect="auto")
        plt.axis('off')
    plt.show()
    audio = data_helper.melspecgrams_to_waves(disp_images)[:, :, 0].eval(session=tf.Session()) * 100000
    audio = audio.astype(np.float32)
    for i in range(0, 4):
        display.display(display.Audio(audio[i, :], rate=16000))
    
    
    return images['global_step'][0, 0]
Beispiel #14
0
def play(a: np.array,
         rate: int = 44100,
         volume: float = 0.2,
         repeat: int = 1,
         autoplay=True):
    wave = np.tile(a, repeat)
    return ipd.Audio(wave, rate=rate, autoplay=autoplay)
Beispiel #15
0
def doStuff():
    plt.figure(figsize=(20, 80))
    offset = 40
    duration = 1
    x, sr = librosa.load(r'C:\Dev\tools\WavFiles\Kool & The Gang - Get Down On It.wav', offset=offset,duration=duration)
    print(x.shape)
    ipd.display(ipd.Audio(x, rate=sr))
    hop_length = 128
    n_fft = 4096
    D = librosa.stft(x, n_fft=n_fft, hop_length=hop_length)
    plt.subplot(3, 1, 1)
    librosa.display.specshow(librosa.amplitude_to_db(librosa.magphase(D)[0], ref=numpy.max), y_axis='log',x_axis='time')
    pitches, magnitudes = librosa.core.piptrack(y=x, sr=sr, S=D, threshold=0.1)
    plt.subplot(3, 1, 2)
    librosa.display.specshow(pitches, y_axis='linear', x_axis='time')
    print(pitches.shape)
    plt.subplot(3, 1, 3)
    librosa.display.specshow(magnitudes, y_axis='linear', x_axis='time')
    average_magnitudes = numpy.average(magnitudes, 1)
    max_avg_mag = numpy.int(max(average_magnitudes))
    step = 0.01
    bins = numpy.arange(0,0.5,step)
    hist, bin_edges = numpy.histogram(average_magnitudes, bins)
    plt.clf()
    plt.bar(bin_edges[:-1], hist, width=step)
    plt.xlim(min(bin_edges), max(bin_edges))
    mag_thresh = [index for index,val in average_magnitudes if val > 0.5]
    print(len(mag_thresh))
    plt.draw()
    plt.show()

    m = pairwise_distances(magnitudes, metric=dtw_metric)
Beispiel #16
0
def show_wav(file_name):
    file_name = path_train + '0\\00.wav'
    plt.figure(figsize=(12, 4))
    data, sample_rate = librosa.load(file_name)
    _ = librosa.display.waveplot(data, sr=sample_rate)
    ipd.Audio(file_name)
    plt.show()
def plot_graphs_for_freq(freq):
    x = np.sin(2 * np.pi * freq * n/Fs)
    
    plt.subplot(221)
    plt.xlabel('Amostras')
    plt.ylabel(f'$\sin(2 \pi \cdot {freq} \cdot n/8192)$')
    plt.title('50 Primeiras Amostras de $x[n]$')
    plt.stem(n[:50], x[:50])
    
    plt.subplot(223)
    plt.xlabel('Tempo ($s$)')
    plt.ylabel(f'$\sin(2 \pi \cdot {freq} \cdot n/8192)$')
    plt.title('50 Primeiras Amostras de $x[n]$')
    plt.plot(t[:50], x[:50])
    
    X, w = ctfts(x, 1/Fs)
    plt.subplot(222)
    plt.title('Magnitude de $X$ versus $f$')
    plt.ylabel('$|H(j\Omega)|$')
    plt.xlabel('Frequência ($Hz$)')
    plt.plot(w, np.abs(X))
    
    plt.subplot(224)
    plt.title('Fase de $X$ versus $f$')
    plt.ylabel('$\\angle H(j\Omega)$')
    plt.xlabel('Frequência ($Hz$)')
    plt.plot(w, np.angle(filter_small_values(X), deg=True))
    
    plt.tight_layout()
    plt.show()
    
    print(f'{freq}Hz Tone')
    ipd.display(ipd.Audio(x, rate=Fs))
    print()
Beispiel #18
0
def fun1(audio_path):
    x, sr = librosa.load(audio_path)
    ipd.Audio(x, rate=sr)

    hop_length = 512 * 8
    chromagram = librosa.feature.chroma_stft(x, sr=sr, hop_length=hop_length)
    a = len(chromagram)
    b = len(chromagram[0])
    #print(a,b)
    result = [[0] * b] * a

    # ret=get_wav_time(audio_path)
    #print(ret)

    cnt = 0
    for i in range(b):
        max = 0
        resul = ''
        for j in range(a):
            result[j][i] = re_map[j]
            if max < chromagram[j][i]:
                max = chromagram[j][i]
                resul = result[j][i]

                cnt = cnt + 1
                if cnt > 17:
                    print(result[j][i])
                    cnt = 0

    return resul
Beispiel #19
0
def fun1(audio_path):
    x, sr = librosa.load(audio_path)
    ipd.Audio(x, rate=sr)

    hop_length = 512 * 8
    chromagram = librosa.feature.chroma_stft(x, sr=sr, hop_length=hop_length)
    a2 = len(chromagram)
    b2 = len(chromagram[0])
    print(a2, b2)
    result = [[0] * b2] * a2
    re = [0 for i in range(60)]
    # ret=get_wav_time(audio_path)
    #print(ret)

    cnt = 0
    for i in range(0, b):
        max = 0
        m = 0

        for j in range(0, a2):

            result[j][i] = re_map[j]
            if max < chromagram[j][i]:
                max = chromagram[j][i]

                if cnt > 10:
                    print(j, i, re[m])
                    re[m] = re_max
                    cnt = 0
                m = m + 1
    return re
Beispiel #20
0
def add_secondary(clips):
    # INITIALIZE AUDIO AND GET DURATION
    y, sr = librosa.load(SONG_PATH)
    ipd.Audio(y, rate=sr)
    song_length = librosa.core.get_duration(y=y, sr=sr)

    # GET TOTAL TIME LINE DURATION
    total_clip_duration = 0
    for clip in clips:
        total_clip_duration += clip.MPObj.duration

    # FIND DIFFERENCE BETWEEN TIME LINE AND SONG DURATION
    difference = song_length - total_clip_duration

    # PICK CLIPS LESS THAN DIFF
    selected_clips = []
    secondary_bin = [f for f in listdir(SECONDARY_RELATIVE_PATH) if not f.startswith('.')]
    time_left = difference
    while time_left > 14:  # CHOOSING 4 ARBITRARILY-ISH
        selected_clip = random.choice(secondary_bin)
        if int(selected_clip[len(selected_clip)-5: len(selected_clip)-4]) < time_left:
            mp_object = VideoFileClip("./secondary/" + selected_clip)
            selected_clips.append(Clip(mp_object))
            time_left = time_left - int(selected_clip[len(selected_clip)-5: len(selected_clip)-4])

    # ADD CLIPS RANDOMLY INTO ARRAY
    clips_plus_secondary = clips
    for x in selected_clips:
        clips_plus_secondary.insert(randint(0, len(clips_plus_secondary)), x)

    return clips_plus_secondary
Beispiel #21
0
def remove_noise_function(file_name):
    #read audio
    audio = f'{ file_name }.wav'
    path = os.fspath(audio)

    data, sr = librosa.load(path=path, duration=5.0)

    #Remoove noise
    # select section of data that is noise
    noise_len = 2  # seconds
    noise = band_limited_noise(
        min_freq=4000, max_freq=12000, samples=len(data), samplerate=sr) * 10
    noise_clip = noise[:sr * noise_len]
    # perform noise reduction

    reduced_noise = nr.reduce_noise(audio_clip=data,
                                    noise_clip=noise_clip,
                                    verbose=True)

    #diaplay audio

    print('after remove ')

    t = ipd.Audio(reduced_noise, rate=sr)
    librosa.output.write_wav(f'{ file_name }.wav', reduced_noise, sr)
    # changing format from wav to flac
    wav_audio = AudioSegment.from_file(f"{ file_name }.wav", format="wav")
    wav_audio.export(f"{ file_name }.flac", format="flac")
def _make_audio_grid(ds, key, samplerate, rows, cols, plot_scale):
    """Plot the waveforms and IPython objects of some samples of the argument audio dataset

  Args:
    ds: `tf.data.Dataset`. The tf.data.Dataset object to visualize.
    key: The inferred key for the dataset
    samplerate : Inferred samplerate of the dataset.
    rows: `int`, number of rows of the display grid.
    cols: `int`, number of columns of the display grid.
    plot_scale: `float`, controls the plot size of the images. Keep this
      value around 3 to get a good plot. High and low values may cause
      the labels to get overlapped.
  Returns:
    fig: Waveform figure to display. IPython objects are not returned. 
  """
    import IPython.display as ipd
    plt = lazy_imports_lib.lazy_imports.matplotlib.pyplot

    num_examples = rows * cols
    examples = list(dataset_utils.as_numpy(ds.take(num_examples)))

    fig = plt.figure(figsize=(plot_scale * cols, plot_scale * rows))
    fig.subplots_adjust(hspace=1 / plot_scale, wspace=1 / plot_scale)
    t1 = 0
    t2 = 100 * 1000

    for i, ex in enumerate(examples):
        ax = fig.add_subplot(rows, cols, i + 1)
        ax.plot(ex[key])
        audio = ex['audio']
        newaudio = audio[t1:t2]
        ipd.display(ipd.Audio(newaudio, rate=samplerate))

    plt.show()
    return fig
Beispiel #23
0
def transcribe(signal, model, norm=1, chroma=1, log=1):
    if isinstance(signal, util.piece):
        signal.downsample(16000)
        signal = signal.to_chunk(20)


#     plt.figure()

#     assert not (pXs[0] - Xs[0]).any()
#     pXs = Xs[:500]
#     pYs_exp = Ys[:500]
    pYs_act = model.predict_on_batch(signal)
    #     pYs_act = pYs_act * np.arange(pYs_act.shape[1])[None,:]
    pYs_act += 1E-10
    #     compare(log = 1)
    if chroma:
        #        pYs_exp = mroll2chroma(pYs_exp)
        pYs_act = mroll2chroma(pYs_act, norm=1)
    Z1 = pYs_act.T
    if log:
        plt.pcolormesh(Z1,
                       alpha=1.0,
                       norm=mpl.colors.LogNorm(vmin=Z1.min(), vmax=Z1.max()))
    else:
        plt.pcolormesh(Z1)
    fs = np.arange(pYs_act.shape[-1])[None, :]
    pYs_act = (pYs_act) * fs
    ipd.display(ipd.Audio(midi_roll_play(pYs_act), rate=16001.))
    return pYs_act
Beispiel #24
0
def plot(file_name):
    plt.figure(figsize=(12, 4))
    data, sample_rate = librosa.load(file_name)
    _ = librosa.display.waveplot(data, sr=sample_rate)
    ipd.Audio(file_name)

    """
Beispiel #25
0
    def recordAudio(self):
        RATE = 16000
        RECORD_SECONDS = 2.5
        CHUNKSIZE = 1024

        # initialize portaudio
        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNKSIZE)
        print("***Recording ***")

        frames = []
        for _ in range(0, int(RATE / CHUNKSIZE * RECORD_SECONDS)):
            data = stream.read(CHUNKSIZE)
            frames.append(np.fromstring(data, dtype=np.int16))

        # Convert the list of numpy-arrays into a 1D array (column-wise)
        numpydata = np.hstack(frames)
        print("* done")
        # close stream
        stream.stop_stream()
        stream.close()
        p.terminate()

        ipd.Audio(numpydata, rate=RATE)

        dir = "testingData"
        filename = "\output.wav"
        wav.write(dir + filename, RATE, numpydata)
Beispiel #26
0
def play_sequence(sequence,
                  synth=midi_synth.synthesize,
                  sample_rate=_DEFAULT_SAMPLE_RATE,
                  colab_ephemeral=True,
                  **synth_args):
    """Creates an interactive player for a synthesized note sequence.

  This function should only be called from a Jupyter or Colab notebook.

  Args:
    sequence: A music_pb2.NoteSequence to synthesize and play.
    synth: A synthesis function that takes a sequence and sample rate as input.
    sample_rate: The sample rate at which to synthesize.
    colab_ephemeral: If set to True, the widget will be ephemeral in Colab, and
      disappear on reload (and it won't be counted against realtime document
      size).
    **synth_args: Additional keyword arguments to pass to the synth function.
  """
    array_of_floats = synth(sequence, sample_rate=sample_rate, **synth_args)

    try:
        import google.colab  # pylint: disable=unused-import,unused-variable,g-import-not-at-top
        colab_play(array_of_floats, sample_rate, colab_ephemeral)
    except ImportError:
        display.display(display.Audio(array_of_floats, rate=sample_rate))
 def alert(self, n):
     if n >= self.total-1:
         # keep playing the last chunk if more sounds are needed
         n = self.total-1
     data_chunk = self.wav[n*self.chunk_size:(n+1)*self.chunk_size]
     self.display.update(disp.Audio(data_chunk * self.envelope, #* self.volume
                             rate=self.sample_rate, autoplay=True));
Beispiel #28
0
def solve(x_val, y_val, classes, model):
    model = load_model('best_model.hdf5')
    index = random.randint(0, len(x_val) - 1)
    samples = x_val[index].ravel()
    print("Audio:", classes[np.argmax(y_val[index])])
    ipd.Audio(samples, rate=8000)
    print("Prediction:", predict(samples, model, classes))
Beispiel #29
0
def plot_fft_and_listen(filepath, raw_axis = False) :
    sr = 22050
    x = load_wav(filepath)
    x_ft = np.abs(np.fft.fft(x))

    time = np.arange(len(x),dtype=np.float) / sr
    freq = np.arange(len(x_ft), dtype=np.float) / len(x_ft) * sr 

    if raw_axis:
        print 'sample rate:', sr
        print 'N: ', len(x)

    plt.figure()
    plt.subplot(2,1,1)
    if raw_axis:
        plt.plot(x)
        plt.xlabel('n')
        plt.ylabel('$x(n)$')
    else:
        plt.plot(time, x)
        plt.xlabel('time')

    plt.subplot(2,1,2)
    if raw_axis:
        plt.plot(x_ft)
        plt.xlabel('k')
        plt.ylabel('$|X(k)|$')
        plt.xlim(0, 3000*len(x) / sr)
    else:
        plt.plot(freq, x_ft)
        plt.xlim(0, 3000)
        plt.xlabel('Frequency (Hz)')

    return ipd.Audio(x, rate=sr)
Beispiel #30
0
def generate_audio(mel, waveglow, filepath, sample_rate=22050):
    with torch.no_grad():
        audio = waveglow.infer(mel, sigma=0.666)

    audio = audio[0].data.cpu().numpy()
    audio = ipd.Audio(audio, rate=sample_rate)
    with open(filepath, "wb") as f:
        f.write(audio.data)