Esempio n. 1
0
def spec_feature_extraction(wav_file):
    '''
    Extracts the mel and magnitude spectrogram from the given audio file

    :param wav_file:    The audio file to extract spectrogram from

    returns the mel and magnitude spectrogram of the given audio file
    '''
    mel, mag = get_spectrograms(wav_file)
    return mel, mag
def make_one_dataset(filename,total,display=False):

    global finish
    speaker_id = filename.strip().split('/')[-1][:-4]
    mel_spec, lin_spec = get_spectrograms(filename)

    # wav = preprocess_wav(Path(filename))
    # d_mel, d_mel_slices = d_wav2spec(wav)

    print('[Processor] - processing {}/{} {} | mel: {} '.format(
       finish*WORKERS, total, speaker_id, mel_spec.shape), end='\r')
    result = {}
    result['speaker_id'] = speaker_id
    result['mel_spec'] = mel_spec
    result['lin_spec'] = lin_spec
    finish += 1
    return result
def make_one_dataset(filename,total,display=False):
    global finish
    sub_filename = filename.strip().split('/')[-1]
    groups = re.match(r'(\d+)_(\d+)_(\d+)_(\d+)\.wav', sub_filename).groups()     # format: p{speaker}_{sid}.wav
    speaker_id = groups[0]
    utt_id = '_'.join(groups[1:])
    mel_spec, lin_spec = get_spectrograms(filename)

    wav = preprocess_wav(Path(filename))
    d_mel, d_mel_slices = d_wav2spec(wav)

    print('[Processor] - processing {}/{} s{}-{} | mel: {} | d_mel: {}'.format(
       finish*WORKERS, total, speaker_id, utt_id, mel_spec.shape, d_mel.shape), end='\r')
    result = {}
    result['speaker_id'] = speaker_id
    result['utt_id'] = utt_id
    result['d_mel_spec'] = d_mel
    result['d_mel_slices'] = d_mel_slices
    result['mel_spec'] = mel_spec
    result['lin_spec'] = lin_spec
    finish += 1
    return result
Esempio n. 4
0
def save_mel_spectragram(wav_file, npy_dir):
    mel, _ = get_spectrograms(wav_file)
    _, name = os.path.split(wav_file)
    stem, ext = os.path.splitext(name)
    np.save(os.path.join(npy_dir, stem + '.npy'), mel)
Esempio n. 5
0
def spec_feature_extraction(wav_file):
    mel, mag = get_spectrograms(wav_file)
    return mel, mag
Esempio n. 6
0
def audio2feature():
    data_list = []
    label_list = []
    audio_list = []

    # read file
    #files = glob.glob(config['dataset_path']+'/*/*.mp3')
    paths = glob.glob(config['dataset_path'] + '/*')
    for p in paths:  ####song
        print('song_name:' + p)

        files = glob.glob(p + '/*.mp3')
        for filename in files:
            index1 = filename.find('\\') + 1  ####../dataset/train\01
            index2 = filename.find('\\', index1)
            song_label = p[index1:index2]
            #print('song label:{}'.format(song_label))  ###song label
            print(filename)

            if filename.find('solo') != -1:
                instru_class = filename[-11:-9]  #class label
            else:
                instru_class = filename[-6:-4]
            if int(instru_class) >= config['class']:
                print('dump %s cause it is class %s' %
                      (filename, instru_class))
                continue
            y, sr = librosa.core.load(
                filename, offset=0.0, sr=config['sr']
            )  # read after 1 seconds #mono=True(convert signal to mono)
            #y, sr = librosa.load(filename, sr=config['sr'])

            Len = y.shape[0]
            count = 0  #segment count
            st_idx = 0
            end_idx = st_idx + config['audio_samples_frame_size']
            next_idx = st_idx + config['audio_samples_hop_length']

            while st_idx < Len:
                #### label
                label = [song_label, instru_class, count]
                label_list.append(label)
                if end_idx > Len:
                    end_idx = Len  ####last is too short?
                    print(label)  #[song,instrument,count]

                #### audio
                audio = np.zeros(config['audio_samples_frame_size'],
                                 dtype='float32')
                audio[:end_idx - st_idx] = y[st_idx:end_idx]

                if config['pre'] == 'from hung-yi':
                    feature, _ = get_spectrograms(audio)
                else:
                    feature = get_melspec(audio, config)
                data_list.append(feature)
                audio_list.append(audio)
                '''
                # output audio
                outputname=os.path.join('cut{:03d}.wav'.format(count))
                print(outputname)
                librosa.output.write_wav(outputname,audio,config['sr'])
                input()
                '''

                count += 1
                st_idx = next_idx
                end_idx = st_idx + config['audio_samples_frame_size']
                next_idx = st_idx + config['audio_samples_hop_length']

    ###### normal
    if config['nor'] == 'yes':
        print('starting normalize')
        if (config['dataset_path'][-4:] == 'test'):
            with open(os.path.join(config['attr_path'], 'attr.pkl'),
                      'rb') as f:
                attr = pickle.load(f)
            mean = attr['mean']
            std = attr['std']
            print('mean, std:{},{}'.format(mean, std))
        else:
            data_temp = np.concatenate(data_list)
            mean = np.mean(data_temp, axis=0)
            std = np.std(data_temp, axis=0)
            '''
            data_temp shape:(48440, 128)
            mean shape:(128,)
            '''
            attr = {'mean': mean, 'std': std}
            with open(os.path.join(config['npy_path'], 'attr.pkl'), 'wb') as f:
                pickle.dump(attr, f)

        ##### normalize!!!!!!
        data_list = (data_list - mean) / std

    if config['pre'] == 'from hung-yi':
        print('hung-yi transpose')
        temp = []
        for val in data_list:
            val = val.T  #numpy T /// pytorch transpose
            temp.append(val)
        data_list = temp

    # save
    data_name = os.path.join(config['npy_path'], config['data_npy'])
    label_name = os.path.join(config['npy_path'], config['label_npy'])
    np.save(data_name, data_list)
    np.save(label_name, label_list)
    ### to wave
    audio_name = os.path.join(config['npy_path'], config['audio_npy'])
    np.save(audio_name, audio_list)
Esempio n. 7
0
    h5py_path = sys.argv[1]
    filename_groups = defaultdict(lambda: [])
    with h5py.File(h5py_path, 'w') as f_h5:
        grps = [f_h5.create_group('train'), f_h5.create_group('test')]
        filenames = sorted(glob.glob(os.path.join(root_dir, '*/*/*.flac')))
        for filename in filenames:
            # divide into groups
            speaker_id, chapter_id, segment_id = filename.strip().split(
                '/')[-1].strip('.flac').split('-')
            filename_groups[speaker_id].append(filename)
        for speaker_id, filenames in filename_groups.items():
            print('processing {}'.format(speaker_id))
            for filename in filenames[:-1]:
                print(filename)
                speaker_id, chapter_id, segment_id = filename.strip().split(
                    '/')[-1].strip('.flac').split('-')
                mel_spec, lin_spec = get_spectrograms(filename)
                grps[0].create_dataset('{}/{}-{}/mel'.format(speaker_id, chapter_id, segment_id), \
                    data=mel_spec, dtype=np.float32)
                grps[0].create_dataset('{}/{}-{}/lin'.format(speaker_id, chapter_id, segment_id),\
                    data=lin_spec, dtype=np.float32)
            # the last segment put into testset
            filename = filenames[-1]
            speaker_id, chapter_id, segment_id = filename.strip().split(
                '/')[-1].strip('.flac').split('-')
            mel_spec, lin_spec = get_spectrograms(filename)
            grps[1].create_dataset('{}/{}-{}/mel'.format(speaker_id, chapter_id, segment_id), \
                data=mel_spec, dtype=np.float32)
            grps[1].create_dataset('{}/{}-{}/lin'.format(speaker_id, chapter_id, segment_id), \
                data=lin_spec, dtype=np.float32)