Beispiel #1
0
    def hear(self, title=None):
        if title is not None:
            print("Label:", title)

        if self.start is not None or self.end is not None:
            print(
                f"{round(self.start / self.sr, 2)}s-{round(self.end / self.sr, 2)}s of original clip"
            )
            start = 0 if self.start is None else self.start
            end = len(self.sig) - 1 if self.end is None else self.end
            display(
                Audio(data=self.data_signal[start:end], rate=self.sample_rate))
        else:
            display(self.ipy_audio)
 def play_transformed(self):
     if (self.recon is None):
         print "No reconstructed waveform"
         return
     return Audio(self.recon, rate=self.sr)
 def play_original(self):
     return Audio(self.orig, rate=self.sr)
Beispiel #4
0
 def make_audio(self):
     """Makes an IPython Audio object.
     """
     audio = Audio(data=self.ys.real, rate=self.frame_rate)
     return audio
Beispiel #5
0
 def ipy_audio(self):
     return Audio(data=self.data_signal, rate=self.sample_rate)
Beispiel #6
0
def audio_creator():
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 44100
    RECORD_SECONDS = 5
    WAVE_OUTPUT_FILENAME = "data1.wav"
    BAG_INPUT_FILENAME = 'data1.bag'
    WINLEN = float(RATE) / float(CHUNK)  #WINLEN=43/second

    data_store = []
    time_store = []
    for topic, msg, t in rosbag.Bag(BAG_INPUT_FILENAME).read_messages():
        if msg._type == 'hrl_anomaly_detection/audio':
            data_store.append(np.array(msg.audio_data, dtype=np.int16))
            time_store.append(t)

    # print data_store
    # print "\n"
    # print time_store
    # print "\n"

    #copy the frame and insert to lengthen
    data_store_long = []
    baglen = len(data_store)
    num_frames = RATE / CHUNK * RECORD_SECONDS
    recovered_len = num_frames / baglen

    mfcc_store = []
    for frame in data_store:
        for i in range(0, recovered_len):
            data_store_long.append(frame)
            # audio_mfcc = mfcc(frame, samplerate=RATE, nfft=CHUNK, winlen=WINLEN).tolist()[0]
            # mfcc_store.append(audio_mfcc)
            # print audio_mfcc
            # print "\n"

    # mfccdata = np.hstack(mfcc_store)
    # mfccdata = np.reshape(mfccdata, (len(mfccdata)/CHANNELS, CHANNELS))
    # mfccdata = mfcc(numpydata, samplerate=RATE, nfft=CHUNK, winlen=WINLEN).tolist()[0]
    # wav.write('data1mfcc_speachfe.wav', RATE, mfccdata)

    numpydata = np.hstack(data_store_long)

    # mfccdata = mfcc(numpydata, samplerate=RATE, nfft=CHUNK, winlen=WINLEN).tolist()[0]
    # wav.write('data1mfcc_speachfe.wav', RATE, mfccdata)

    #audio_mfcc = mfcc(numpydata, samplerate=RATE, nfft=CHUNK, winlen=WINLEN).tolist()[0]
    #print "audio mfcc"
    #print audio_mfcc
    #print "\n"

    #read WAV - Using numpydata works!(bad quality) but data read from WAV doesn't work.
    #Invalid value encountered -- 0. ???

    filename = "data1.wav"
    y, sr = librosa.load(filename)  # notsure why sr = RATE/2
    #y = numpydata
    #sr = RATE

    #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    #^^^^^^^^^Converting to MFCC and reconstructing^^^^^^^^^^^^^^^^^^^^^
    #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    #calculate mfccs
    Y = librosa.stft(y)  #, n_fft=CHUNK, win_length=WINLEN)
    mfccs = librosa.feature.mfcc(y)  #, sr=sr)

    print griffinlim(mfccs)

    # print "mfccs"
    # print mfccs

    #build reconstruction mappings
    n_mfcc = mfccs.shape[0]
    n_mel = 128
    dctm = librosa.filters.dct(n_mfcc, n_mel)
    n_fft = 2048
    mel_basis = librosa.filters.mel(sr, n_fft)

    #Empirical scaling of channels to get ~flat amplitude mapping.
    bin_scaling = 1.0 / np.maximum(
        0.0005, np.sum(np.dot(mel_basis.T, mel_basis), axis=0))

    #Reconstruct the approximate STFT squared-magnitude from the MFCCs.
    recon_stft = bin_scaling[:, np.newaxis] * np.dot(
        mel_basis.T, invlogamplitude(np.dot(dctm.T, mfccs)))

    #Impose reconstructed magnitude on white noise STFT.
    excitation = np.random.randn(y.shape[0])
    E = librosa.stft(excitation)
    recon = librosa.istft(E / np.abs(E) * np.sqrt(recon_stft))

    #Listen to the reconstruction.
    wav.write('data1mfcc.wav', sr, recon)
    Audio(recon, rate=sr)
    #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    numpydata = np.reshape(numpydata, (len(numpydata) / CHANNELS, CHANNELS))
Beispiel #7
0
        outputs.append(last_output)

    sys.stdout.write('\n')

    def invlogamplitude(S):
        """librosa.logamplitude is actually 10_log10, so invert that."""
        return 10.0*(S/10.0)

    # Reconstruct audio:
    # https://github.com/librosa/librosa/issues/424

    mfccs = np.transpose(np.squeeze(np.concatenate(outputs, axis=1), 0))
    n_mfcc = mfccs.shape[0]
    n_mel = 128
    dctm = librosa.filters.dct(n_mfcc, n_mel)
    n_fft = 2048
    sr = 22050
    mel_basis = librosa.filters.mel(sr, n_fft)
    bin_scaling = 1.0/np.maximum(0.0005, np.sum(np.dot(mel_basis.T, mel_basis), axis=0))
    recon_stft = bin_scaling[:, np.newaxis] * np.dot(mel_basis.T, invlogamplitude(np.dot(dctm.T, mfccs)))
    y_len = int(sr * 2.325)
    excitation = np.random.randn(y_len)
    E = librosa.stft(excitation)
    print(np.shape(recon_stft))
    print(np.shape(excitation))
    print(np.shape(E))
    print(recon_stft)
    recon = librosa.istft(E/np.abs(E)*np.sqrt(recon_stft))

    Audio(recon, rate=sr)