Beispiel #1
0
def load_audio_file(file_path: Path,
                    sr: int,
                    mono: Optional[bool] = True,
                    offset: Optional[float] = 0,
                    duration: Optional[Union[float, None]] = None) \
        -> ndarray:
    """Wrapper for loading audio file.

    TODO: Add handling of cases where librosa fails.

    :param file_path: File path of the audio file.
    :type file_path: pathlib.Path
    :param sr: Sampling frequency to be used. If different\
               from actual, data are resampled.
    :type sr: int
    :param mono: Load file as mono? Defaults to True
    :type mono: bool, optional
    :param offset: Offset for reading the file. Default to 0.0
    :type offset: float, optional
    :param duration: Duration of the returned data. Defaults to None.
    :type duration: float|None, optional
    :return: Audio data as numpy array of shape (channels x samples), \
             if channels >= 2, else (samples, )
    :rtype: numpy.ndarray
    """
    return librosa_load(path=str(file_path),
                        sr=sr,
                        mono=mono,
                        offset=offset,
                        duration=duration)[0]
 def __call__(self, audiopath):
     audio, _ = librosa_load(audiopath, sr=self.samplerate, mono=True)
     chromagram = chroma_cens(y=audio,
                              sr=self.samplerate,
                              hop_length=self.step_size)
     chromagram = np.swapaxes(chromagram, 0, 1)
     chromagram = np.roll(chromagram, 3, axis=1)
     return chromagram, self.get_frame_times(chromagram)
Beispiel #3
0
def transform_playlist(wav_directory: str):
    wav_list = keep_files(os_listdir(wav_directory), 'wav')

    transformer = Transform()

    transforms_dir = create_sub_dir(wav_directory, 'transforms')

    transforms_sub_dirs = ['preciser_spectrogram', 'preciser_melspectrogram', 'preciser_cqt', 'locator_v1_cqt',
                           'locator_v2_spectrogram', 'locator_v2_melspectrogram']
    for sub_dir in transforms_sub_dirs:
        create_sub_dir(transforms_dir, sub_dir)

    for wav_file in wav_list:
        samples, sample_rate = librosa_load(os_path_join(wav_directory, wav_file), mono=True, sr=44100)

        preciser_spectrogram = transformer.get_spectrogram(sample_rate, samples, 10, 512)
        preciser_melspectrogram = transformer.get_melspectrogram(sample_rate, samples, 10, 256)
        preciser_cqt = transformer.get_constant_q(sample_rate, samples, 108)

        locator_v1_cqt = transformer.get_constant_q(sample_rate, samples, 108)

        locator_v2_spectrogram = transformer.get_spectrogram(sample_rate, samples, 100, 4411)
        locator_v2_melspectrogram = transformer.get_melspectrogram(sample_rate, samples, 100, 256)

        transforms = {'preciser_spectrogram': preciser_spectrogram,
                      'preciser_melspectrogram': preciser_melspectrogram,
                      'preciser_cqt': preciser_cqt,
                      'locator_v1_cqt': locator_v1_cqt,
                      'locator_v2_spectrogram': locator_v2_spectrogram,
                      'locator_v2_melspectrogram': locator_v2_melspectrogram}

        for transform in transforms:
            imsave(
                os_path_join(
                    transforms_dir,
                    transform,
                    wav_file + '.jpg'),
                np_transpose(transforms[transform])
            )
            print('Saved', wav_file + '.jpg')
Beispiel #4
0
audio_read_flag = 0  # 1表示需要提取特征,0表示从csv读取已提取好的特征
mfcc_train_root = './mfcc_tmp/mfcc_train.npy'
mfcc_test_root = './mfcc_tmp/mfcc_test.npy'
if not os_path_exists('./mfcc_tmp/'):
    makedirs('./mfcc_tmp/')
    audio_read_flag = 1

audio_train_data = np.empty([train_pos_num + train_neg_num, 2 * parameter_num])
audio_test_data = np.empty([test_num, 2 * parameter_num])

if audio_read_flag == 1:
    audio_data1 = np.empty([train_pos_num, 2 * parameter_num])
    audio_data2 = np.empty([train_neg_num, 2 * parameter_num])
    for i in range(0, train_pos_num):
        y, sr = librosa_load(dataset_root + '/train/positive/' + str(i) +
                             '/audio.wav',
                             sr=16000)  # initial sr=16000
        mfccs = mfcc(y=y, sr=sr, n_mfcc=parameter_num)
        for j in range(0, parameter_num):
            audio_data1[i][j] = np.mean(mfccs[j][:])
            audio_data1[i][j + parameter_num] = np.var(mfccs[j][:])
    print("Audio positive read finished")

    for i in range(0, train_neg_num):
        y, sr = librosa_load(dataset_root + '/train/negative/' + str(i) +
                             '/audio.wav',
                             sr=16000)
        mfccs = mfcc(y=y, sr=sr, n_mfcc=parameter_num)
        for j in range(0, parameter_num):
            audio_data2[i][j] = np.mean(mfccs[j][:])
            audio_data2[i][j + parameter_num] = np.var(mfccs[j][:])
Beispiel #5
0
 def check_file(in_path=""):
     try:
         librosa_load(path=in_path, sr=_sr)
     except Exception as e:
         remove(in_path)
         pp("Delete: " + basename(in_path))
Beispiel #6
0
 def _load_file_raw(self, pkl):
     self.y, self.sr = librosa_load(path=self.input_file, sr=self.sr, duration=self.load_duration)
     if Values.USE_PKL:
         pickle_dump([self.y, self.sr], open(pkl, "wb"))