def update_read_command(self): """ update mic """ # read chunk is_onset = self.read_mic_data() # onset was detected if is_onset: # start collection of items self.collector.start_collecting() # collection is full if self.collector.is_full(): # read out collection x_onset = self.collector.read_collection() # extract features mfcc_bon, bon_pos = self.feature_extractor.extract_mfcc(x_onset) # classify collection y_hat, label = self.classifier.classify(mfcc_bon) # plot plot_mfcc_profile( x_onset[bon_pos * self.hop:(bon_pos + self.feature_params['frame_size']) * self.hop], self.feature_params['fs'], self.N, self.hop, mfcc_bon, frame_size=self.feature_params['frame_size'], plot_path=self.plot_path, name='collect-{}_label-{}'.format( self.collector.collection_counter, label), enable_plot=self.mic_params['enable_plot']) # clear read queue self.clear_mic_queue() return label return None
def showcase_wavs(cfg, raw_plot=True, spec_plot=True, mfcc_plot=True, show_plot=False): """ showcase wavs """ # plot path plot_path = '../docu/thesis/3_signal/figs/' # change params feature_params = cfg['feature_params'].copy() feature_params['n_ceps_coeff'] = 32 feature_params['norm_features'] = True # init feature extractor feature_extractor = FeatureExtractor(feature_params) # wav, anno dir wav_dir, anno_dir = '../ignore/my_recordings/showcase_wavs/', '../ignore/my_recordings/showcase_wavs/annotation/' # analyze some wavs for wav, anno in zip(glob(wav_dir + '*.wav'), glob(anno_dir + '*.TextGrid')): # info print("\nwav: ", wav), print("anno: ", anno) # load file x, _ = librosa.load(wav, sr=feature_params['fs']) # raw waveform if raw_plot: plot_waveform(x, feature_params['fs'], anno_file=anno, hop=feature_extractor.hop, plot_path=plot_path, name='signal_raw_' + wav.split('/')[-1].split('.')[0] + '_my', show_plot=show_plot) # spectogram if spec_plot: plot_spec_profile(x, feature_extractor.calc_spectogram(x).T, feature_params['fs'], feature_extractor.N, feature_extractor.hop, anno_file=anno, plot_path=plot_path, title=wav.split('/')[-1].split('.')[0] + '_my', name='signal_spec-lin_' + wav.split('/')[-1].split('.')[0] + '_my', show_plot=show_plot) plot_spec_profile(x, feature_extractor.calc_spectogram(x).T, feature_params['fs'], feature_extractor.N, feature_extractor.hop, log_scale=True, anno_file=anno, plot_path=plot_path, title=wav.split('/')[-1].split('.')[0] + '_my', name='signal_spec-log_' + wav.split('/')[-1].split('.')[0] + '_my', show_plot=show_plot) # mfcc if mfcc_plot: mfcc, bon_pos = feature_extractor.extract_mfcc( x, reduce_to_best_onset=False) plot_mfcc_profile(x, cfg['feature_params']['fs'], feature_extractor.N, feature_extractor.hop, mfcc, anno_file=anno, sep_features=True, bon_pos=bon_pos, frame_size=cfg['feature_params']['frame_size'], plot_path=plot_path, name='signal_mfcc_' + wav.split('/')[-1].split('.')[0] + '_my', close_plot=False, show_plot=show_plot)
def extract_mfcc_data(self, wavs, annos, n_examples, set_name=None): """ extract mfcc data from wav-files wavs must be in a 2D-array [[wavs_class1], [wavs_class2]] so that n_examples will work properly """ # mfcc_data: [n x m x l], labels and index mfcc_data, label_data, index_data = np.empty(shape=(0, self.channel_size, self.feature_size, self.frame_size), dtype=np.float64), [], [] # extract class wavs for class_wavs, class_annos in zip(wavs, annos): # class annotation file names extraction class_annos_file_names = [l + i for f, i, l in [self.file_naming_extraction(a, file_ext='.TextGrid') for a in class_annos]] # number of class examples num_class_examples = 0 # run through each example in class wavs for wav in class_wavs: # extract file namings file_name, file_index, label = self.file_naming_extraction(wav, file_ext=self.dataset_cfg['file_ext']) # get annotation if available anno = None if label + file_index in class_annos_file_names: anno = class_annos[class_annos_file_names.index(label + file_index)] # load and pre-process audio x, wav_is_useless = self.wav_pre_processing(wav) if wav_is_useless: continue # print some info if self.verbose: print("wav: [{}] with label: [{}], samples=[{}], time=[{}]s".format(wav, label, len(x), len(x) / self.feature_params['fs'])) # extract feature vectors [m x l] mfcc, bon_pos = self.feature_extractor.extract_mfcc(x, reduce_to_best_onset=False) # collect wavs if self.collect_wavs: self.pre_wavs.append((librosa.util.normalize(x), label + str(file_index) + '_' + set_name, bon_pos)) # plot mfcc features plot_mfcc_profile(x, self.feature_params['fs'], self.feature_extractor.N, self.feature_extractor.hop, mfcc, anno_file=anno, onsets=None, bon_pos=bon_pos, mient=None, minreg=None, frame_size=self.frame_size, plot_path=self.plot_paths['mfcc'], name=label + str(file_index) + '_' + set_name, enable_plot=self.dataset_cfg['enable_plot']) # damaged file check if self.dataset_cfg['filter_damaged_files']: # handle damaged files if self.detect_damaged_file(mfcc, wav): continue # add to mfcc_data container mfcc_data = np.vstack((mfcc_data, mfcc[np.newaxis, :, :, bon_pos:bon_pos+self.frame_size])) label_data.append(label) index_data.append(label + file_index) # update number of examples per class num_class_examples += 1 # stop if desired examples are reached if num_class_examples >= n_examples: break return mfcc_data, label_data, None, index_data
# load audio x, _ = librosa.load(wav, sr=16000) # feature extraction mfcc, bon_pos = feature_extractor.extract_mfcc(x, reduce_to_best_onset=False) print("mfcc: ", mfcc.shape) # invert mfcc x_hat = feature_extractor.invert_mfcc(np.squeeze(mfcc)) # save invert mfcc soundfile.write(wav.split('.wav')[0] + '_inv_mfcc.wav', x_hat, 16000, subtype=None, endian=None, format=None, closefd=True) print("x_hat: ", x_hat.shape) plot_mfcc_profile(x, 16000, feature_extractor.N, feature_extractor.hop, mfcc, anno_file=anno, sep_features=True, diff_plot=False, bon_pos=bon_pos, frame_size=cfg['feature_params']['frame_size'], plot_path=wav_dir, name=wav.split('/')[-1].split('.')[0], show_plot=False, close_plot=False) plot_waveform(x, 16000, anno_file=anno, hop=feature_extractor.hop, title=wav.split('/')[-1].split('.')[0]+'_my', plot_path=wav_dir, name=wav.split('/')[-1].split('.')[0], show_plot=False) # random #x = np.random.randn(16000) #mfcc, bon_pos = feature_extractor.extract_mfcc(x, reduce_to_best_onset=False) #plot_mfcc_profile(x, 16000, feature_extractor.N, feature_extractor.hop, mfcc, bon_pos=bon_pos, name='rand', show_plot=True)