def extract_features(self, group): assert group in ["enroll", "test"],\ "Invalid group name!! Choose either 'enroll', 'test'" in_files = os.listdir(os.path.join(self.audio_dir, group)) feat_dir = os.path.join(self.feat_dir, group) extractor = sidekit.FeaturesExtractor( audio_filename_structure=os.path.join(self.audio_dir, group, "{}"), feature_filename_structure=os.path.join(feat_dir, "{}.h5"), lower_frequency=self.LOWER_FREQUENCY, higher_frequency=self.HIGHER_FREQUENCY, filter_bank=self.FILTER_BANK, filter_bank_size=self.FILTER_BANK_SIZE, window_size=self.WINDOW_SIZE, shift=self.WINDOW_SHIFT, ceps_number=self.CEPS_NUMBER, vad=self.VAD, snr=self.SNR_RATIO, save_param=self.FEAUTRES, keep_all_features=True) show_list = np.unique(np.hstack([in_files])) channel_list = np.zeros_like(show_list, dtype=int) SKIPPED = [] for show, channel in zip(show_list, channel_list): try: extractor.save(show, channel) except RuntimeError: logging.info("SKIPPED") SKIPPED.append(show) continue logging.info("Number of skipped files: " + str(len(SKIPPED))) for show in SKIPPED: logging.debug(show)
def make_feature_server(dirname, frame_size): """Return a Sidekit FeatureServer instance for this experiement """ sampling_frequency = 16000 # window size must be twice the frame size to give the right number of FFT points but since # we can't zero pad, we'll be taking in more of the signal in each frame window_size = (2* frame_size+1) / sampling_frequency shift = 0.008 # make a feature server to compute features over our audio files extractor = sidekit.FeaturesExtractor(audio_filename_structure=dirname + "/{}.wav", sampling_frequency=sampling_frequency, lower_frequency=0, higher_frequency=sampling_frequency/2, filter_bank="lin", filter_bank_size=frame_size, window_size=window_size, shift=shift, ceps_number=20, pre_emphasis=0.97, save_param=["fb"], keep_all_features=False) server = sidekit.FeaturesServer(features_extractor=extractor, sources=None, dataset_list=["fb"], keep_all_features=True) return server
def extractFeatures(self, group): """ This function computes the acoustic parameters of audio files insied "self.BASE_DIR/group": for a list of audio files and save them to disk in a HDF5 format """ in_files = os.listdir(os.path.join(self.BASE_DIR, "audio", group)) feat_dir = os.path.join(self.BASE_DIR, "feat", group) # Feature extraction # lower_frequency: lower frequency (in Herz) of the filter bank # higher_frequency: higher frequency of the filter bank # filter_bank: type of fiter scale to use, can be "lin" or "log" (for linear of log-scale) # filter_bank_size: number of filters banks # window_size: size of the sliding window to process (in seconds) # shift: time shift of the sliding window (in seconds) # ceps_number: number of cepstral coefficients to extract # snr: signal to noise ratio used for "snr" vad algorithm # vad: Type of voice activity detection algorithm to use. # It Can be "energy", "snr", "percentil" or "lbl". # save_param: list of strings that indicate which parameters to save. The strings can be: # for bottle-neck features and "vad" for the frame selection labels. # keep_all_features: boolean, if True, all frames are writen; if False, keep only frames according to the vad label extractor = sidekit.FeaturesExtractor( audio_filename_structure=os.path.join(self.BASE_DIR, "audio", group, "{}"), feature_filename_structure=os.path.join(feat_dir, "{}.h5"), lower_frequency=self.LOWER_FREQUENCY, higher_frequency=self.HIGHER_FREQUENCY, filter_bank=self.FILTER_BANK, filter_bank_size=self.FILTER_BANK_SIZE, window_size=self.WINDOW_SIZE, shift=self.WINDOW_SHIFT, ceps_number=self.CEPS_NUMBER, vad=self.VAD, snr=self.SNR_RATIO, save_param=self.FEAUTRES, keep_all_features=True) # Prepare file lists # show_list: list of IDs of the show to process show_list = np.unique(np.hstack([in_files])) # channel_list: list of channel indices corresponding to each show channel_list = np.zeros_like(show_list, dtype=int) # save the features in feat_dir where the resulting-files parameters # are always concatenated in the following order: # (energy, fb, cep, bnf, vad_label). # SKIPPED: list to track faulty-files SKIPPED = [] for show, channel in zip(show_list, channel_list): try: extractor.save(show, channel) except RuntimeError: logging.info("SKIPPED") SKIPPED.append(show) continue logging.info("Number of skipped files: " + str(len(SKIPPED))) for show in SKIPPED: logging.debug(show)
def extractFeature(audio_dir, feature_dir, cep='mfcc', train=True): ''' param: audio_dir : path to audio dir feature_dir : path to feature dir cep : cepstrum type 'mfcc' or 'plp' train : if True, extract feature from train set, else, from test set return: None ''' # if feature dir not exists, create feature dir if not os.path.exists(feature_dir): os.mkdir(feature_dir) wavlist = [] if train: wav_list_file = os.getcwd() + '/log/aishell2_train.log' else: wav_list_file = os.getcwd() + '/log/aishell2_test.log' if os.path.exists(wav_list_file): with open(wav_list_file, 'r') as fobj: for i in fobj: wavlist.append(i[0:-1]) else: wavlist = readDirStruct(audio_dir, train) # prepare the necessary variables showlist = np.asarray(wavlist) channellist = np.zeros_like(showlist, dtype=int) # create feature extractor extractor = sidekit.FeaturesExtractor( audio_filename_structure=audio_dir + '/{}.wav', feature_filename_structure=feature_dir + "/{}.h5", sampling_frequency=16000, lower_frequency=133.3333, higher_frequency=6955.4976, filter_bank="log", filter_bank_size=64, window_size=0.025, shift=0.01, ceps_number=20, vad="snr", snr=40, pre_emphasis=0.97, save_param=["vad", "energy", "cep", "fb"], feature_type=cep, keep_all_features=True) # save the feature print('start extracting feature') extractor.save_list(show_list=showlist, channel_list=channellist, num_thread=cpu_count() // 2) print('extract feature done')
def make_feature_server(): """Return a Sidekit FeatureServer instance for this experiement config: DATA_DIR, FEAT_DIR """ dd = config('DATA_DIR') fd = config('FEAT_DIR') # TODO: more of these settings should be derived from the config file # make a feature server to compute features over our audio files extractor = sidekit.FeaturesExtractor(audio_filename_structure=dd+'{}.wav', feature_filename_structure=fd+"{}.h5", sampling_frequency=None, lower_frequency=200, higher_frequency=3800, filter_bank="log", filter_bank_size=24, window_size=0.025, shift=0.01, ceps_number=20, vad="snr", snr=40, pre_emphasis=0.97, save_param=["vad", "energy", "cep", "fb"], keep_all_features=True) server = sidekit.FeaturesServer(features_extractor=extractor, feature_filename_structure=fd+"{}.h5", sources=None, dataset_list=["energy", "cep", "vad"], mask="[0-12]", feat_norm="cmvn", global_cmvn=None, dct_pca=False, dct_pca_config=None, sdc=False, sdc_config=None, delta=True, double_delta=True, delta_filter=None, context=None, traps_dct_nb=None, rasta=True, keep_all_features=True) return server
def extract_speaker_data(self, speaker_files): curr_speaker_num = 0 max_speakers = len(speaker_files.keys()) file_names = [] speaker_ids = [] fe = sidekit.FeaturesExtractor( audio_filename_structure="{}.wav", feature_filename_structure="common/data/training/i_vector/" + self.speaker_list + "/feat/{}.h5", sampling_frequency=None, lower_frequency=200, higher_frequency=3800, filter_bank="log", filter_bank_size=24, window_size=0.025, shift=0.01, ceps_number=20, vad="snr", snr=40, pre_emphasis=0.97, save_param=["energy", "cep", "vad"], keep_all_features=True) # Crawl the base and all sub folders for speaker in speaker_files.keys(): curr_speaker_num += 1 speaker_uid = curr_speaker_num print('Extraction progress: %d/%d' % (curr_speaker_num, max_speakers)) # Extract files for full_path in speaker_files[speaker]: file_name = os.path.splitext(os.path.basename(full_path))[0] fe.save(show=speaker + '_' + file_name, channel=0, input_audio_filename=full_path, output_feature_filename=None, noise_file_name=None, snr=10, reverb_file_name=None, reverb_level=-26.0) file_names.append(speaker + '_' + file_name) speaker_ids.append(str(speaker_uid)) return file_names, speaker_ids
def featureExtractor(subFolder_path): e = sidekit.FeaturesExtractor( audio_filename_structure=subFolder_path + '{}', feature_filename_structure=None, #subFolder_path + '{}.h5', sampling_frequency=8000, lower_frequency=0, higher_frequency=4000, filter_bank="log", filter_bank_size=24, window_size=0.025, shift=0.01, ceps_number=20, vad="snr", snr=40, pre_emphasis=0.97, save_param=["vad", "energy", "cep", "fb"], keep_all_features=False) return e
def extract_feat(args): # wav directory and feature directory audio_folder = '/home/zeng/zeng/aishell/aishell2/ios/data/wav' features_folder = '/home/zeng/zeng/aishell/aishell2/ios/data/feature' wavlist = [] if os.path.exists(os.getcwd() + '/log/aishell2_wavlist.log'): with open(os.getcwd() + '/log/aishell2_wavlist.log', 'r') as fobj: for i in fobj: wavlist.append(i[0:-1]) else: wavlist = preprocess() # prepare the necessary variables showlist = np.asarray(wavlist) channellist = np.zeros_like(showlist, dtype=int) # create feature extractor extractor = sidekit.FeaturesExtractor( audio_filename_structure=audio_folder + '/{}.wav', feature_filename_structure=features_folder + "/{}.h5", sampling_frequency=16000, lower_frequency=100.0, higher_frequency=7000.0, filter_bank="log", filter_bank_size=64, window_size=0.025, shift=0.01, ceps_number=24, vad="snr", snr=40, pre_emphasis=0.97, save_param=["vad", "energy", "cep", "fb"], keep_all_features=True) # save the feature extractor.save_list(show_list=showlist, channel_list=channellist, num_thread=args.num_thread)
def make_mfcc_feats(self, wavList, input_file_list, output_feat_list, nj): extractor = sidekit.FeaturesExtractor(audio_filename_structure=None, feature_filename_structure=None, sampling_frequency=8000, lower_frequency=20, higher_frequency=3700, filter_bank='log', filter_bank_size=24, window_size=0.025, shift=0.01, ceps_number=20, vad='percentil', snr=40, pre_emphasis=0.97, save_param=["energy", "cep", "vad"], keep_all_features=False ) print("Start extracting the features") channel_list = numpy.zeros(len(wavList), dtype='int') extractor.save_list(show_list=wavList, channel_list=channel_list, audio_file_list=input_file_list, feature_file_list=output_feat_list, num_thread=nj) print("extracting the features success")
) as inputFile: ubmList = inputFile.read().split('\n') nameList = [] for a in ubmList: nameList.append(a.rsplit('.')[0]) directory = '/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio/' extractor = sidekit.FeaturesExtractor( audio_filename_structure= "/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio/{}.wav", feature_filename_structure="/home/adit/Desktop/shikha_maam/Features/{}.h5", sampling_frequency=16000, lower_frequency=10, higher_frequency=8000, filter_bank="log", filter_bank_size=40, window_size=0.040, ceps_number=12, shift=0.020, vad=None, snr=None, pre_emphasis=0.97, save_param=["fb"], keep_all_features=None) show_list = np.unique([nameList]) channel_list = np.zeros_like(show_list, dtype=int) logging.info("Extract features and save to disk") extractor.save_list(show_list=show_list, channel_list=channel_list)
distribNb = 64 # number of Gaussian distributions for each GMM audioDir = 'chunks_data/all_train/' # Automatically set the number of parallel process to run. # The number of threads to run is set equal to the number of cores available # on the machine minus one or to 1 if the machine has a single core. nbThread = max(multiprocessing.cpu_count()-1, 1) extractor = sidekit.FeaturesExtractor(audio_filename_structure="chunks_data/all_train/{}", feature_filename_structure="chunks_features/all_train/{}.h5", sampling_frequency=None, lower_frequency=200, higher_frequency=4000, filter_bank="log", filter_bank_size=26, window_size=0.025, shift=0.01, ceps_number=13, # vad="snr", # snr=10, pre_emphasis=0.97, save_param=["energy", "cep", "fb"], keep_all_features=True) server = sidekit.FeaturesServer(features_extractor=extractor, feature_filename_structure="chunks_features/all_train/{}.h5", # sources=None, dataset_list=["energy", "cep", "fb"], mask="[0-12]", feat_norm="cmvn", global_cmvn=None,
input_file_list.append(filepath) output_feat_list.append(os.path.join(feat_fir, 'train', uttId + '.h5')) output_feat_lines.append( os.path.join(feat_fir, 'train', uttId + '.h5') + '\n') with open(test_feats_scp_paath, 'w') as f: f.writelines(output_feat_lines) extractor_eval = sidekit.FeaturesExtractor(audio_filename_structure=None, feature_filename_structure=None, sampling_frequency=44100, lower_frequency=20, higher_frequency=3700, filter_bank='log', filter_bank_size=24, window_size=0.025, shift=0.01, ceps_number=20, vad='energy', snr=40, pre_emphasis=0.97, save_param=["energy", "cep", "vad"], keep_all_features=False) print("Start extracting enroll features") channel_list = np.zeros(len(enroll_list), dtype='int') extractor_eval.save_list(show_list=enroll_list, channel_list=channel_list, audio_file_list=input_file_list, feature_file_list=output_feat_list, num_thread=4)
test_ndx = sidekit.Ndx('/info/home/larcher/task/3sess-pwd_eval_m_ndx.h5') key = sidekit.Key('/info/home/larcher/task/3sess-pwd_eval_m_key.h5') with open('/info/home/larcher/task/ubm_list.txt') as inputFile: ubmList = inputFile.read().split('\n') # Process the audio to save MFCC on disk logging.info("Initialize FeaturesExtractor") extractor = sidekit.FeaturesExtractor( audio_filename_structure=audioDir + "/{}.wav", feature_filename_structure="./features/{}.h5", sampling_frequency=16000, lower_frequency=133.3333, higher_frequency=6955.4976, filter_bank="log", filter_bank_size=40, window_size=0.025, shift=0.01, ceps_number=19, vad="snr", snr=40, pre_emphasis=0.97, save_param=["vad", "energy", "cep"], keep_all_features=False) # Get the complete list of features to extract show_list = np.unique( np.hstack([ubmList, enroll_idmap.rightids, np.unique(test_ndx.segset)])) channel_list = np.zeros_like(show_list, dtype=int) logging.info("Extract features and save to disk")
def __init__(self, input_filepath_structure=None, feature_filename_structure=None, mel_feature_filename_structure=None, sampling_frequency=16000, lower_frequency=200, higher_frequency_mel=3800, filter_bank="log", filter_bank_size=24, window_size=0.025, shift=0.01, ceps_number=20, snr=40, pre_emphasis=0.97): self.__sk_lin_feature_extractor = sk.FeaturesExtractor( audio_filename_structure=input_filepath_structure, feature_filename_structure=feature_filename_structure, lower_frequency=lower_frequency, higher_frequency=sampling_frequency / 2, filter_bank="lin", filter_bank_size=filter_bank_size, window_size=window_size, shift=shift, ceps_number=ceps_number, vad="snr", snr=snr, save_param=["cep", "fb"], sampling_frequency=sampling_frequency, keep_all_features=True, pre_emphasis=pre_emphasis, ) self.__sk_mel_feature_extractor = sk.FeaturesExtractor( audio_filename_structure=input_filepath_structure, feature_filename_structure=mel_feature_filename_structure, lower_frequency=lower_frequency, higher_frequency=higher_frequency_mel, filter_bank="log", filter_bank_size=filter_bank_size, window_size=window_size, shift=shift, ceps_number=ceps_number, vad="snr", snr=snr, save_param=["vad", "energy", "cep", "fb"], sampling_frequency=sampling_frequency, keep_all_features=True, pre_emphasis=pre_emphasis, ) self.input_filepath_structure = input_filepath_structure self.feature_filename_structure = feature_filename_structure self.mel_feature_filename_structure = mel_feature_filename_structure self.sampling_frequency = sampling_frequency self.lower_frequency = lower_frequency self.higher_frequency_mel = higher_frequency_mel self.filter_bank_size = filter_bank_size self.window_size = window_size self.shift = shift self.ceps_number = ceps_number self.snr = snr self.pre_emphasis = pre_emphasis
directory = os.fsencode("/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio")#"/home/adit/Desktop")# ) #os.chdir = "/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio/" for file in os.listdir(directory): filename = os.fsdecode(file) if filename.endswith(".wav"): # print(os.path.join(directory, filename)) extractor = sidekit.FeaturesExtractor(audio_filename_structure = "/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio/"+filename, feature_filename_structure = "/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio_mfec_hdf5/"+filename+".h5", sampling_frequency= 44100, lower_frequency=20, higher_frequency = 22000, filter_bank = "log", filter_bank_size = 40, window_size = 0.040, ceps_number = 12, shift = 0.020, vad = "snr", snr = 40, pre_emphasis= 0.97, save_param = ["fb"], keep_all_features = True) extractor.save("") file = h5py.File('/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio_mfec_hdf5/'+filename+'.h5','r+') data = file['/fb'] #print(data) tocsv = data[:,:] data1 = file['/fb_mean'] tomean = data1[:,]
inputpath = "/home/aditya/Desktop/MFCCs" show_list = [] channel_list = [] for filename in os.listdir(inputpath+"/accfiles"): show_list += [os.path.splitext(filename)[0]] channel_list += [0] extractor = sidekit.FeaturesExtractor(audio_filename_structure=inputpath+"/accfiles/"+"{}.wav", feature_filename_structure=inputpath+"/features/"+"{}.hd5", sampling_frequency=8000, lower_frequency=200, higher_frequency=3800, filter_bank="log", filter_bank_size=24, window_size=0.02, shift=0.01, ceps_number=20, vad="snr", snr=40, pre_emphasis=0.97, save_param=["energy", "cep"], keep_all_features=True) extractor.save_list(show_list=show_list, channel_list=channel_list, num_thread=6)