Esempio n. 1
0
    def read_wav_file(self, file_name):

        audio, fs = librosa.core.load(file_name, sr=config.fs)

        audio = np.float64(audio)

        if len(audio.shape) == 2:

            vocals = np.array((audio[:, 1] + audio[:, 0]) / 2)

        else:
            vocals = np.array(audio)

        voc_stft = abs(
            np.array(
                utils.stft(audio,
                           hopsize=config.hopsize,
                           nfft=config.framesize,
                           fs=config.fs)))

        feats = sig_process.get_world_feats(audio)

        voc_stft = np.clip(voc_stft, 0.0, 1.0)

        return feats, voc_stft
def get_feats(audio):
    """
    Function to get acoustic input features, starting with STFT, needs to be extended to include MFCCs, will ask how many coefficients to use.
    """

    # stft = librosa.core.stft(audio, n_fft = config.nfft, hop_length = config.hopsize, window = config.window).T

    stft = utils.stft(audio,
                      window=config.window,
                      hopsize=config.hopsize,
                      nfft=config.nfft,
                      fs=config.fs)

    assert abs(stft).max() <= 1.0

    # voc_stft_mag = 2 * abs(voc_stft)/np.sum(config.window)

    # voc_stft_phase = np.angle(voc_stft)

    # cqt = librosa.core.cqt(audio, sr = config.fs, hop_length = config.hopsize, n_bins = config.cqt_bins, fmin = config.fmin, bins_per_octave = config.bins_per_octave).T

    # hcqt = get_hcqt(audio)

    # hcqt = np.swapaxes(hcqt, 0,1)

    return stft
Esempio n. 3
0
    def extract_feature_wav(self, audio):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """


        stft = abs(np.array(utils.stft(audio, hopsize=config.hopsize, nfft=config.framesize, fs=config.fs)))

        out_feats = self.extract_feature(stft, self.sess)

        out_feats = out_feats[:stft.shape[0]]

        return out_feats
Esempio n. 4
0
def process_seg_yam(audio, audio_back):
    """
    Process a segment of the audio.
    Returns the world features, TONY annotated notes and the STFT.
    """
    out_feats = sig_process.get_world_feats(audio)
    #Test if the reverse works.
    # audio_out = sig_process.feats_to_audio(out_feats)

    traj = vamp_notes.extract_notes_pYIN_vamp(audio)

    if traj.shape[0]<1 or len(out_feats)<=config.max_phr_len:
        return None,None,None
    else:

        timestamps = np.arange(0, float(traj[-1][1]), config.hoptime)

        out_notes = vamp_notes.note2traj(traj, timestamps)

        out_notes_1 = sig_process.f0_to_hertz(out_notes[:,0])

        out_notes_1[out_notes_1== -np.inf] = 0

        out_notes[:,0] = out_notes_1

        out_stft = abs(np.array(utils.stft(audio, hopsize=config.hopsize, nfft=config.framesize, fs=config.fs)))
        back_stft = abs(np.array(utils.stft(audio_back, hopsize=config.hopsize, nfft=config.framesize, fs=config.fs)))

        out_feats, out_notes, out_stft, back_stft = utils.match_time([out_feats, out_notes, out_stft, back_stft])

        if len(out_feats)<=config.max_phr_len:
            return None,None,None, None
        else:

            assert all(out_feats[:,-2]>0)

            assert len(out_feats) == len(out_notes)

            return out_feats, out_notes, out_stft, back_stft
    def read_wav_file(self, file_name):

        audio, fs = librosa.core.load(file_name, sr=config.fs)

        audio = np.float64(audio)

        if len(audio.shape) == 2:

            vocals = np.array((audio[:, 1] + audio[:, 0]) / 2)

        else:
            vocals = np.array(audio)

        voc_stft = abs(utils.stft(vocals))

        feats = utils.stft_to_feats(vocals, fs)

        voc_stft = np.clip(voc_stft, 0.0, 1.0)

        return voc_stft, feats