def test_shift_invariance(self, x_wav, actual_label): """ test model against shift invariance """ # init lists pred_label_list, probs = [], [] # feature extraction x, _ = self.feature_extractor.extract_mfcc( x_wav, reduce_to_best_onset=False ) if self.net_handler.nn_arch != 'wavenet' else (x_wav[np.newaxis, :], 0) # windowed x_win = np.squeeze( view_as_windows( x, self.data_size, step=self.cfg_tb['shift_frame_step']), axis=(0, 1)) if self.net_handler.nn_arch != 'wavenet' else np.squeeze( view_as_windows(x, self.data_size, step=self.cfg_tb['shift_frame_step'] * self.feature_extractor.hop), axis=0) for i, x in enumerate(x_win): # classify y_hat, o, pred_label = self.net_handler.classify_sample(x) # append predicted label pred_label_list.append(pred_label) probs.append(float(o[0, self.class_dict[actual_label]])) # plot time_s = frames_to_sample(i * self.cfg_tb['shift_frame_step'], self.feature_params['fs'], self.feature_extractor.hop) time_e = frames_to_sample( i * self.cfg_tb['shift_frame_step'] + self.feature_params['frame_size'], self.feature_params['fs'], self.feature_extractor.hop) # plot waveform if self.cfg_tb['enable_plot']: plot_waveform(x_wav[time_s:time_e], self.feature_params['fs'], title='frame{} actual: [{}] pred: [{}]'.format( i, actual_label, pred_label), plot_path=self.paths['shift_wavs'], name='{}_frame{}'.format(actual_label, i)) # correct list corrects = [int(actual_label == l) for l in pred_label_list] # print message print("test bench shift acc: ", np.sum(corrects) / len(corrects)) return corrects, probs
def create_sets(self): """ cut and copy recorded wavs """ print("wav: ", self.wav_folders) # get all .wav files raw_wavs = glob(self.dataset_path + '*' + self.dataset_cfg['file_ext']) # get all wav files and save them for i, wav in enumerate(raw_wavs): print("wav: ", wav) # filename extraction file_name, file_index, label = self.file_naming_extraction(wav, file_ext=self.dataset_cfg['file_ext']) # read audio from file x, _ = librosa.load(wav, sr=self.feature_params['fs']) # calc onsets onsets = calc_onsets(x, self.feature_params['fs'], N=self.N, hop=self.hop, adapt_frames=5, adapt_alpha=0.09, adapt_beta=0.8) onsets = self.clean_onsets(onsets) # cut examples to one second x_cut = self.cut_signal(x, onsets, time=1, alpha=0.4) # plot onsets plot_onsets(x, self.feature_params['fs'], self.N, self.hop, onsets, title=label, plot_path=self.plot_paths['onsets'], name='onsets_{}'.format(label)) for j, xj in enumerate(x_cut): # plot plot_waveform(xj, self.feature_params['fs'], title='{}-{}'.format(label, j), plot_path=self.plot_paths['waveform'], name='example_{}-{}'.format(label, j)) # save file soundfile.write('{}{}{}.wav'.format(self.wav_folders[0], label, j), xj, self.feature_params['fs'], subtype=None, endian=None, format=None, closefd=True)
def show_waveform_colors(x, cmaps): """ waveform colors """ for i, cmap in enumerate(cmaps): # plot weight matrices fig = plot_waveform(x, 16000, e=None, cmap=cmap, hop=None, onset_frames=None, title='none', xlim=None, ylim=None, plot_path=None, name='None', show_plot=False) # positioning if i >= 3: i, j = i%3, 600 else: i, j = i, 0 fig.canvas.manager.window.setGeometry(i*600, j, 600, 500)
def showcase_wavs(cfg, raw_plot=True, spec_plot=True, mfcc_plot=True, show_plot=False): """ showcase wavs """ # plot path plot_path = '../docu/thesis/3_signal/figs/' # change params feature_params = cfg['feature_params'].copy() feature_params['n_ceps_coeff'] = 32 feature_params['norm_features'] = True # init feature extractor feature_extractor = FeatureExtractor(feature_params) # wav, anno dir wav_dir, anno_dir = '../ignore/my_recordings/showcase_wavs/', '../ignore/my_recordings/showcase_wavs/annotation/' # analyze some wavs for wav, anno in zip(glob(wav_dir + '*.wav'), glob(anno_dir + '*.TextGrid')): # info print("\nwav: ", wav), print("anno: ", anno) # load file x, _ = librosa.load(wav, sr=feature_params['fs']) # raw waveform if raw_plot: plot_waveform(x, feature_params['fs'], anno_file=anno, hop=feature_extractor.hop, plot_path=plot_path, name='signal_raw_' + wav.split('/')[-1].split('.')[0] + '_my', show_plot=show_plot) # spectogram if spec_plot: plot_spec_profile(x, feature_extractor.calc_spectogram(x).T, feature_params['fs'], feature_extractor.N, feature_extractor.hop, anno_file=anno, plot_path=plot_path, title=wav.split('/')[-1].split('.')[0] + '_my', name='signal_spec-lin_' + wav.split('/')[-1].split('.')[0] + '_my', show_plot=show_plot) plot_spec_profile(x, feature_extractor.calc_spectogram(x).T, feature_params['fs'], feature_extractor.N, feature_extractor.hop, log_scale=True, anno_file=anno, plot_path=plot_path, title=wav.split('/')[-1].split('.')[0] + '_my', name='signal_spec-log_' + wav.split('/')[-1].split('.')[0] + '_my', show_plot=show_plot) # mfcc if mfcc_plot: mfcc, bon_pos = feature_extractor.extract_mfcc( x, reduce_to_best_onset=False) plot_mfcc_profile(x, cfg['feature_params']['fs'], feature_extractor.N, feature_extractor.hop, mfcc, anno_file=anno, sep_features=True, bon_pos=bon_pos, frame_size=cfg['feature_params']['frame_size'], plot_path=plot_path, name='signal_mfcc_' + wav.split('/')[-1].split('.')[0] + '_my', close_plot=False, show_plot=show_plot)
command = mic.update_read_command() # check if command if command is not None: # print command print("command: ", command) # clear queue mic.clear_mic_queue() # some prints print("x_all: ", mic.collector.x_all.shape) print("e_all: ", mic.collector.e_all.shape) print("on_all: ", mic.collector.on_all.shape) # plot waveform plot_waveform(mic.collector.x_all, cfg['feature_params']['fs'], e=mic.collector.e_all * 10, hop=mic.hop, onset_frames=mic.collector.on_all, title='input stream', ylim=(-1, 1), plot_path=None, name='None', show_plot=True) # save audio mic.save_audio_file()
# check if command if command is not None: # print command print("command: ", command) # clear queue mic.clear_mic_queue() # some prints print("x_all: ", mic.collector.x_all.shape) print("e_all: ", mic.collector.e_all.shape) print("on_all: ", mic.collector.on_all.shape) # plot waveform plot_waveform(mic.collector.x_all, cfg['feature_params']['fs'], mic.collector.e_all * 10, hop, mic.collector.on_all, title='input stream', ylim=(-1, 1), plot_path=None, name='None') # save audio mic.save_audio_file() # show plots plt.show()
def extract_raw_data(self, wavs, annos, n_examples, set_name=None): """ raw data extraction """ # raw data: [n x m], labels and index raw_data, label_data, target_data, index_data = np.empty(shape=(0, self.channel_size, self.raw_frame_size), dtype=np.float64), [], np.empty(shape=(0, self.raw_frame_size), dtype=np.int64), [] # extract class wavs for class_wavs, class_annos in zip(wavs, annos): # class annotation file names extraction class_annos_file_names = [l + i for f, i, l in [self.file_naming_extraction(a, file_ext='.TextGrid') for a in class_annos]] # number of class examples num_class_examples = 0 # run through each example in class wavs for wav in class_wavs: # extract file namings file_name, file_index, label = self.file_naming_extraction(wav, file_ext=self.dataset_cfg['file_ext']) # get annotation if available anno = class_annos[class_annos_file_names.index(label + file_index)] if label + file_index in class_annos_file_names else None # load and pre-process audio x, wav_is_useless = self.wav_pre_processing(wav) if wav_is_useless: continue # print some info if self.verbose: print("wav: [{}] with label: [{}], samples=[{}], time=[{}]s".format(wav, label, len(x), len(x) / self.feature_params['fs'])) # extract raw samples from region of energy raw, bon_pos = self.feature_extractor.get_best_raw_samples(x) # add dither and do normalization raw = self.wav_post_processing(raw) # quantize data t = self.feature_extractor.quantize(raw) # plot waveform if self.dataset_cfg['enable_plot']: plot_waveform(x, self.feature_params['fs'], bon_samples=[bon_pos, bon_pos+self.raw_frame_size], title=label + file_index, plot_path=self.plot_paths['waveform'], name=label + file_index, show_plot=False, close_plot=True) # collect wavs if self.collect_wavs: self.pre_wavs.append((librosa.util.normalize(x), label + str(file_index) + '_' + set_name, bon_pos / self.hop)) # add to mfcc_data container raw_data = np.vstack((raw_data, raw[np.newaxis, :])) target_data = np.vstack((target_data, t)) label_data.append(label) index_data.append(label + file_index) # update number of examples per class num_class_examples += 1 # stop if desired examples are reached if num_class_examples >= n_examples: break return raw_data, label_data, target_data, index_data
# load audio x, _ = librosa.load(wav, sr=16000) # feature extraction mfcc, bon_pos = feature_extractor.extract_mfcc(x, reduce_to_best_onset=False) print("mfcc: ", mfcc.shape) # invert mfcc x_hat = feature_extractor.invert_mfcc(np.squeeze(mfcc)) # save invert mfcc soundfile.write(wav.split('.wav')[0] + '_inv_mfcc.wav', x_hat, 16000, subtype=None, endian=None, format=None, closefd=True) print("x_hat: ", x_hat.shape) plot_mfcc_profile(x, 16000, feature_extractor.N, feature_extractor.hop, mfcc, anno_file=anno, sep_features=True, diff_plot=False, bon_pos=bon_pos, frame_size=cfg['feature_params']['frame_size'], plot_path=wav_dir, name=wav.split('/')[-1].split('.')[0], show_plot=False, close_plot=False) plot_waveform(x, 16000, anno_file=anno, hop=feature_extractor.hop, title=wav.split('/')[-1].split('.')[0]+'_my', plot_path=wav_dir, name=wav.split('/')[-1].split('.')[0], show_plot=False) # random #x = np.random.randn(16000) #mfcc, bon_pos = feature_extractor.extract_mfcc(x, reduce_to_best_onset=False) #plot_mfcc_profile(x, 16000, feature_extractor.N, feature_extractor.hop, mfcc, bon_pos=bon_pos, name='rand', show_plot=True)
def test_noise_invariance(self, x_wav, actual_label, mu=0): """ test model against noise invariance """ # init lists pred_label_list, probs = [], [] # origin if self.cfg_tb['enable_plot']: plot_waveform(x_wav, self.feature_params['fs'], title='origin actual: [{}]'.format(actual_label), plot_path=self.paths['shift_wavs'], name='{}_origin'.format(actual_label)) # test model with different snr values for snr in self.cfg_tb['snrs']: # signal power p_x_eff = x_wav @ x_wav.T / len(x_wav) # calculate noise signal power sigma = np.sqrt(p_x_eff / (10**(snr / 10))) # noise generation n = np.random.normal(mu, sigma, len(x_wav)) # add noise x_noise = x_wav + n # noise signal power p_n_eff = n @ n.T / len(n) # print energy info # print("sigma: ", sigma), print("p_x: ", p_x_eff), print("p_n: ", p_n_eff), print("db: ", 10 * np.log10(p_x_eff / p_n_eff)) # feature extraction #x_mfcc, _ = self.feature_extractor.extract_mfcc(x_noise, reduce_to_best_onset=True) # feature extraction x, _ = self.feature_extractor.extract_mfcc( x_noise, reduce_to_best_onset=True ) if self.net_handler.nn_arch != 'wavenet' else self.feature_extractor.get_best_raw_samples( x_noise, add_channel_dim=True) # classify y_hat, o, pred_label = self.net_handler.classify_sample(x) # append predicted label and probs pred_label_list.append(pred_label) probs.append(float(o[0, self.class_dict[actual_label]])) # plot wavs if self.cfg_tb['enable_plot']: plot_waveform(x_noise, self.feature_params['fs'], title='snr: [{}] actual: [{}] pred: [{}]'.format( snr, actual_label, pred_label), plot_path=self.paths['noise_wavs'], name='{}_snr{}'.format(actual_label, snr)) # correct list corrects = [int(actual_label == l) for l in pred_label_list] # print message print("test bench noise acc: ", np.sum(corrects) / len(corrects)) return corrects, probs