def spec_feature_extraction(wav_file): ''' Extracts the mel and magnitude spectrogram from the given audio file :param wav_file: The audio file to extract spectrogram from returns the mel and magnitude spectrogram of the given audio file ''' mel, mag = get_spectrograms(wav_file) return mel, mag
def make_one_dataset(filename,total,display=False): global finish speaker_id = filename.strip().split('/')[-1][:-4] mel_spec, lin_spec = get_spectrograms(filename) # wav = preprocess_wav(Path(filename)) # d_mel, d_mel_slices = d_wav2spec(wav) print('[Processor] - processing {}/{} {} | mel: {} '.format( finish*WORKERS, total, speaker_id, mel_spec.shape), end='\r') result = {} result['speaker_id'] = speaker_id result['mel_spec'] = mel_spec result['lin_spec'] = lin_spec finish += 1 return result
def make_one_dataset(filename,total,display=False): global finish sub_filename = filename.strip().split('/')[-1] groups = re.match(r'(\d+)_(\d+)_(\d+)_(\d+)\.wav', sub_filename).groups() # format: p{speaker}_{sid}.wav speaker_id = groups[0] utt_id = '_'.join(groups[1:]) mel_spec, lin_spec = get_spectrograms(filename) wav = preprocess_wav(Path(filename)) d_mel, d_mel_slices = d_wav2spec(wav) print('[Processor] - processing {}/{} s{}-{} | mel: {} | d_mel: {}'.format( finish*WORKERS, total, speaker_id, utt_id, mel_spec.shape, d_mel.shape), end='\r') result = {} result['speaker_id'] = speaker_id result['utt_id'] = utt_id result['d_mel_spec'] = d_mel result['d_mel_slices'] = d_mel_slices result['mel_spec'] = mel_spec result['lin_spec'] = lin_spec finish += 1 return result
def save_mel_spectragram(wav_file, npy_dir): mel, _ = get_spectrograms(wav_file) _, name = os.path.split(wav_file) stem, ext = os.path.splitext(name) np.save(os.path.join(npy_dir, stem + '.npy'), mel)
def spec_feature_extraction(wav_file): mel, mag = get_spectrograms(wav_file) return mel, mag
def audio2feature(): data_list = [] label_list = [] audio_list = [] # read file #files = glob.glob(config['dataset_path']+'/*/*.mp3') paths = glob.glob(config['dataset_path'] + '/*') for p in paths: ####song print('song_name:' + p) files = glob.glob(p + '/*.mp3') for filename in files: index1 = filename.find('\\') + 1 ####../dataset/train\01 index2 = filename.find('\\', index1) song_label = p[index1:index2] #print('song label:{}'.format(song_label)) ###song label print(filename) if filename.find('solo') != -1: instru_class = filename[-11:-9] #class label else: instru_class = filename[-6:-4] if int(instru_class) >= config['class']: print('dump %s cause it is class %s' % (filename, instru_class)) continue y, sr = librosa.core.load( filename, offset=0.0, sr=config['sr'] ) # read after 1 seconds #mono=True(convert signal to mono) #y, sr = librosa.load(filename, sr=config['sr']) Len = y.shape[0] count = 0 #segment count st_idx = 0 end_idx = st_idx + config['audio_samples_frame_size'] next_idx = st_idx + config['audio_samples_hop_length'] while st_idx < Len: #### label label = [song_label, instru_class, count] label_list.append(label) if end_idx > Len: end_idx = Len ####last is too short? print(label) #[song,instrument,count] #### audio audio = np.zeros(config['audio_samples_frame_size'], dtype='float32') audio[:end_idx - st_idx] = y[st_idx:end_idx] if config['pre'] == 'from hung-yi': feature, _ = get_spectrograms(audio) else: feature = get_melspec(audio, config) data_list.append(feature) audio_list.append(audio) ''' # output audio outputname=os.path.join('cut{:03d}.wav'.format(count)) print(outputname) librosa.output.write_wav(outputname,audio,config['sr']) input() ''' count += 1 st_idx = next_idx end_idx = st_idx + config['audio_samples_frame_size'] next_idx = st_idx + config['audio_samples_hop_length'] ###### normal if config['nor'] == 'yes': print('starting normalize') if (config['dataset_path'][-4:] == 'test'): with open(os.path.join(config['attr_path'], 'attr.pkl'), 'rb') as f: attr = pickle.load(f) mean = attr['mean'] std = attr['std'] print('mean, std:{},{}'.format(mean, std)) else: data_temp = np.concatenate(data_list) mean = np.mean(data_temp, axis=0) std = np.std(data_temp, axis=0) ''' data_temp shape:(48440, 128) mean shape:(128,) ''' attr = {'mean': mean, 'std': std} with open(os.path.join(config['npy_path'], 'attr.pkl'), 'wb') as f: pickle.dump(attr, f) ##### normalize!!!!!! data_list = (data_list - mean) / std if config['pre'] == 'from hung-yi': print('hung-yi transpose') temp = [] for val in data_list: val = val.T #numpy T /// pytorch transpose temp.append(val) data_list = temp # save data_name = os.path.join(config['npy_path'], config['data_npy']) label_name = os.path.join(config['npy_path'], config['label_npy']) np.save(data_name, data_list) np.save(label_name, label_list) ### to wave audio_name = os.path.join(config['npy_path'], config['audio_npy']) np.save(audio_name, audio_list)
h5py_path = sys.argv[1] filename_groups = defaultdict(lambda: []) with h5py.File(h5py_path, 'w') as f_h5: grps = [f_h5.create_group('train'), f_h5.create_group('test')] filenames = sorted(glob.glob(os.path.join(root_dir, '*/*/*.flac'))) for filename in filenames: # divide into groups speaker_id, chapter_id, segment_id = filename.strip().split( '/')[-1].strip('.flac').split('-') filename_groups[speaker_id].append(filename) for speaker_id, filenames in filename_groups.items(): print('processing {}'.format(speaker_id)) for filename in filenames[:-1]: print(filename) speaker_id, chapter_id, segment_id = filename.strip().split( '/')[-1].strip('.flac').split('-') mel_spec, lin_spec = get_spectrograms(filename) grps[0].create_dataset('{}/{}-{}/mel'.format(speaker_id, chapter_id, segment_id), \ data=mel_spec, dtype=np.float32) grps[0].create_dataset('{}/{}-{}/lin'.format(speaker_id, chapter_id, segment_id),\ data=lin_spec, dtype=np.float32) # the last segment put into testset filename = filenames[-1] speaker_id, chapter_id, segment_id = filename.strip().split( '/')[-1].strip('.flac').split('-') mel_spec, lin_spec = get_spectrograms(filename) grps[1].create_dataset('{}/{}-{}/mel'.format(speaker_id, chapter_id, segment_id), \ data=mel_spec, dtype=np.float32) grps[1].create_dataset('{}/{}-{}/lin'.format(speaker_id, chapter_id, segment_id), \ data=lin_spec, dtype=np.float32)