Esempio n. 1
0
    def test_shift_invariance(self, x_wav, actual_label):
        """
    test model against shift invariance
    """

        # init lists
        pred_label_list, probs = [], []

        # feature extraction
        x, _ = self.feature_extractor.extract_mfcc(
            x_wav, reduce_to_best_onset=False
        ) if self.net_handler.nn_arch != 'wavenet' else (x_wav[np.newaxis, :],
                                                         0)

        # windowed
        x_win = np.squeeze(
            view_as_windows(
                x, self.data_size, step=self.cfg_tb['shift_frame_step']),
            axis=(0,
                  1)) if self.net_handler.nn_arch != 'wavenet' else np.squeeze(
                      view_as_windows(x,
                                      self.data_size,
                                      step=self.cfg_tb['shift_frame_step'] *
                                      self.feature_extractor.hop),
                      axis=0)

        for i, x in enumerate(x_win):

            # classify
            y_hat, o, pred_label = self.net_handler.classify_sample(x)

            # append predicted label
            pred_label_list.append(pred_label)
            probs.append(float(o[0, self.class_dict[actual_label]]))

            # plot
            time_s = frames_to_sample(i * self.cfg_tb['shift_frame_step'],
                                      self.feature_params['fs'],
                                      self.feature_extractor.hop)
            time_e = frames_to_sample(
                i * self.cfg_tb['shift_frame_step'] +
                self.feature_params['frame_size'], self.feature_params['fs'],
                self.feature_extractor.hop)

            # plot waveform
            if self.cfg_tb['enable_plot']:
                plot_waveform(x_wav[time_s:time_e],
                              self.feature_params['fs'],
                              title='frame{} actual: [{}] pred: [{}]'.format(
                                  i, actual_label, pred_label),
                              plot_path=self.paths['shift_wavs'],
                              name='{}_frame{}'.format(actual_label, i))

        # correct list
        corrects = [int(actual_label == l) for l in pred_label_list]

        # print message
        print("test bench shift acc: ", np.sum(corrects) / len(corrects))

        return corrects, probs
Esempio n. 2
0
  def create_sets(self):
    """
    cut and copy recorded wavs
    """

    print("wav: ", self.wav_folders)

    # get all .wav files
    raw_wavs = glob(self.dataset_path + '*' + self.dataset_cfg['file_ext'])

    # get all wav files and save them
    for i, wav in enumerate(raw_wavs):

      print("wav: ", wav)

      # filename extraction
      file_name, file_index, label = self.file_naming_extraction(wav, file_ext=self.dataset_cfg['file_ext'])

      # read audio from file
      x, _ = librosa.load(wav, sr=self.feature_params['fs'])

      # calc onsets
      onsets = calc_onsets(x, self.feature_params['fs'], N=self.N, hop=self.hop, adapt_frames=5, adapt_alpha=0.09, adapt_beta=0.8)
      onsets = self.clean_onsets(onsets)

      # cut examples to one second
      x_cut = self.cut_signal(x, onsets, time=1, alpha=0.4)

      # plot onsets
      plot_onsets(x, self.feature_params['fs'], self.N, self.hop, onsets, title=label, plot_path=self.plot_paths['onsets'], name='onsets_{}'.format(label))

      for j, xj in enumerate(x_cut):

        # plot
        plot_waveform(xj, self.feature_params['fs'], title='{}-{}'.format(label, j), plot_path=self.plot_paths['waveform'], name='example_{}-{}'.format(label, j))

        # save file
        soundfile.write('{}{}{}.wav'.format(self.wav_folders[0], label, j), xj, self.feature_params['fs'], subtype=None, endian=None, format=None, closefd=True)
Esempio n. 3
0
def show_waveform_colors(x, cmaps):
  """
  waveform colors
  """

  for i, cmap in enumerate(cmaps):

    # plot weight matrices
    fig = plot_waveform(x, 16000, e=None, cmap=cmap, hop=None, onset_frames=None, title='none', xlim=None, ylim=None, plot_path=None, name='None', show_plot=False)
    
    # positioning
    if i >= 3: i, j = i%3, 600
    else: i, j = i, 0
    fig.canvas.manager.window.setGeometry(i*600, j, 600, 500)
Esempio n. 4
0
def showcase_wavs(cfg,
                  raw_plot=True,
                  spec_plot=True,
                  mfcc_plot=True,
                  show_plot=False):
    """
  showcase wavs
  """

    # plot path
    plot_path = '../docu/thesis/3_signal/figs/'

    # change params
    feature_params = cfg['feature_params'].copy()
    feature_params['n_ceps_coeff'] = 32
    feature_params['norm_features'] = True

    # init feature extractor
    feature_extractor = FeatureExtractor(feature_params)

    # wav, anno dir
    wav_dir, anno_dir = '../ignore/my_recordings/showcase_wavs/', '../ignore/my_recordings/showcase_wavs/annotation/'

    # analyze some wavs
    for wav, anno in zip(glob(wav_dir + '*.wav'),
                         glob(anno_dir + '*.TextGrid')):

        # info
        print("\nwav: ", wav), print("anno: ", anno)

        # load file
        x, _ = librosa.load(wav, sr=feature_params['fs'])

        # raw waveform
        if raw_plot:
            plot_waveform(x,
                          feature_params['fs'],
                          anno_file=anno,
                          hop=feature_extractor.hop,
                          plot_path=plot_path,
                          name='signal_raw_' +
                          wav.split('/')[-1].split('.')[0] + '_my',
                          show_plot=show_plot)

        # spectogram
        if spec_plot:
            plot_spec_profile(x,
                              feature_extractor.calc_spectogram(x).T,
                              feature_params['fs'],
                              feature_extractor.N,
                              feature_extractor.hop,
                              anno_file=anno,
                              plot_path=plot_path,
                              title=wav.split('/')[-1].split('.')[0] + '_my',
                              name='signal_spec-lin_' +
                              wav.split('/')[-1].split('.')[0] + '_my',
                              show_plot=show_plot)
            plot_spec_profile(x,
                              feature_extractor.calc_spectogram(x).T,
                              feature_params['fs'],
                              feature_extractor.N,
                              feature_extractor.hop,
                              log_scale=True,
                              anno_file=anno,
                              plot_path=plot_path,
                              title=wav.split('/')[-1].split('.')[0] + '_my',
                              name='signal_spec-log_' +
                              wav.split('/')[-1].split('.')[0] + '_my',
                              show_plot=show_plot)

        # mfcc
        if mfcc_plot:
            mfcc, bon_pos = feature_extractor.extract_mfcc(
                x, reduce_to_best_onset=False)
            plot_mfcc_profile(x,
                              cfg['feature_params']['fs'],
                              feature_extractor.N,
                              feature_extractor.hop,
                              mfcc,
                              anno_file=anno,
                              sep_features=True,
                              bon_pos=bon_pos,
                              frame_size=cfg['feature_params']['frame_size'],
                              plot_path=plot_path,
                              name='signal_mfcc_' +
                              wav.split('/')[-1].split('.')[0] + '_my',
                              close_plot=False,
                              show_plot=show_plot)
Esempio n. 5
0
            command = mic.update_read_command()

            # check if command
            if command is not None:

                # print command
                print("command: ", command)

        # clear queue
        mic.clear_mic_queue()

    # some prints
    print("x_all: ", mic.collector.x_all.shape)
    print("e_all: ", mic.collector.e_all.shape)
    print("on_all: ", mic.collector.on_all.shape)

    # plot waveform
    plot_waveform(mic.collector.x_all,
                  cfg['feature_params']['fs'],
                  e=mic.collector.e_all * 10,
                  hop=mic.hop,
                  onset_frames=mic.collector.on_all,
                  title='input stream',
                  ylim=(-1, 1),
                  plot_path=None,
                  name='None',
                  show_plot=True)

    # save audio
    mic.save_audio_file()
Esempio n. 6
0
            # check if command
            if command is not None:

                # print command
                print("command: ", command)

        # clear queue
        mic.clear_mic_queue()

    # some prints
    print("x_all: ", mic.collector.x_all.shape)
    print("e_all: ", mic.collector.e_all.shape)
    print("on_all: ", mic.collector.on_all.shape)

    # plot waveform
    plot_waveform(mic.collector.x_all,
                  cfg['feature_params']['fs'],
                  mic.collector.e_all * 10,
                  hop,
                  mic.collector.on_all,
                  title='input stream',
                  ylim=(-1, 1),
                  plot_path=None,
                  name='None')

    # save audio
    mic.save_audio_file()

    # show plots
    plt.show()
Esempio n. 7
0
  def extract_raw_data(self, wavs, annos, n_examples, set_name=None):
    """
    raw data extraction
    """

    # raw data: [n x m], labels and index
    raw_data, label_data, target_data, index_data = np.empty(shape=(0, self.channel_size, self.raw_frame_size), dtype=np.float64), [], np.empty(shape=(0, self.raw_frame_size), dtype=np.int64), []

    # extract class wavs
    for class_wavs, class_annos in zip(wavs, annos):

      # class annotation file names extraction
      class_annos_file_names = [l + i for f, i, l in [self.file_naming_extraction(a, file_ext='.TextGrid') for a in class_annos]]

      # number of class examples
      num_class_examples = 0

      # run through each example in class wavs
      for wav in class_wavs:
        
        # extract file namings
        file_name, file_index, label = self.file_naming_extraction(wav, file_ext=self.dataset_cfg['file_ext'])

        # get annotation if available
        anno = class_annos[class_annos_file_names.index(label + file_index)] if label + file_index in class_annos_file_names else None

        # load and pre-process audio
        x, wav_is_useless = self.wav_pre_processing(wav)
        if wav_is_useless: continue

        # print some info
        if self.verbose: print("wav: [{}] with label: [{}], samples=[{}], time=[{}]s".format(wav, label, len(x), len(x) / self.feature_params['fs']))

        # extract raw samples from region of energy
        raw, bon_pos = self.feature_extractor.get_best_raw_samples(x)

        # add dither and do normalization
        raw = self.wav_post_processing(raw)

        # quantize data
        t = self.feature_extractor.quantize(raw)

        # plot waveform
        if self.dataset_cfg['enable_plot']: plot_waveform(x, self.feature_params['fs'],  bon_samples=[bon_pos, bon_pos+self.raw_frame_size], title=label + file_index, plot_path=self.plot_paths['waveform'], name=label + file_index, show_plot=False, close_plot=True)

        # collect wavs
        if self.collect_wavs: self.pre_wavs.append((librosa.util.normalize(x), label + str(file_index) + '_' + set_name, bon_pos / self.hop))

        # add to mfcc_data container
        raw_data = np.vstack((raw_data, raw[np.newaxis, :]))
        target_data = np.vstack((target_data, t))
        label_data.append(label)
        index_data.append(label + file_index)

        # update number of examples per class
        num_class_examples += 1

        # stop if desired examples are reached
        if num_class_examples >= n_examples: break

    return raw_data, label_data, target_data, index_data
Esempio n. 8
0
    # load audio
    x, _ = librosa.load(wav, sr=16000)

    # feature extraction
    mfcc, bon_pos = feature_extractor.extract_mfcc(x, reduce_to_best_onset=False)

    print("mfcc: ", mfcc.shape)
    
    # invert mfcc
    x_hat = feature_extractor.invert_mfcc(np.squeeze(mfcc))

    # save invert mfcc
    soundfile.write(wav.split('.wav')[0] + '_inv_mfcc.wav', x_hat, 16000, subtype=None, endian=None, format=None, closefd=True)

    print("x_hat: ", x_hat.shape)

    plot_mfcc_profile(x, 16000, feature_extractor.N, feature_extractor.hop, mfcc, anno_file=anno, sep_features=True, diff_plot=False, bon_pos=bon_pos, frame_size=cfg['feature_params']['frame_size'], plot_path=wav_dir, name=wav.split('/')[-1].split('.')[0], show_plot=False, close_plot=False)
    plot_waveform(x, 16000, anno_file=anno, hop=feature_extractor.hop, title=wav.split('/')[-1].split('.')[0]+'_my', plot_path=wav_dir, name=wav.split('/')[-1].split('.')[0], show_plot=False)
    
  # random
  #x = np.random.randn(16000)
  #mfcc, bon_pos = feature_extractor.extract_mfcc(x, reduce_to_best_onset=False)
  #plot_mfcc_profile(x, 16000, feature_extractor.N, feature_extractor.hop, mfcc, bon_pos=bon_pos, name='rand', show_plot=True)






Esempio n. 9
0
    def test_noise_invariance(self, x_wav, actual_label, mu=0):
        """
    test model against noise invariance
    """

        # init lists
        pred_label_list, probs = [], []

        # origin
        if self.cfg_tb['enable_plot']:
            plot_waveform(x_wav,
                          self.feature_params['fs'],
                          title='origin actual: [{}]'.format(actual_label),
                          plot_path=self.paths['shift_wavs'],
                          name='{}_origin'.format(actual_label))

        # test model with different snr values
        for snr in self.cfg_tb['snrs']:

            # signal power
            p_x_eff = x_wav @ x_wav.T / len(x_wav)

            # calculate noise signal power
            sigma = np.sqrt(p_x_eff / (10**(snr / 10)))

            # noise generation
            n = np.random.normal(mu, sigma, len(x_wav))

            # add noise
            x_noise = x_wav + n

            # noise signal power
            p_n_eff = n @ n.T / len(n)

            # print energy info
            # print("sigma: ", sigma), print("p_x: ", p_x_eff), print("p_n: ", p_n_eff), print("db: ", 10 * np.log10(p_x_eff / p_n_eff))

            # feature extraction
            #x_mfcc, _ = self.feature_extractor.extract_mfcc(x_noise, reduce_to_best_onset=True)

            # feature extraction
            x, _ = self.feature_extractor.extract_mfcc(
                x_noise, reduce_to_best_onset=True
            ) if self.net_handler.nn_arch != 'wavenet' else self.feature_extractor.get_best_raw_samples(
                x_noise, add_channel_dim=True)

            # classify
            y_hat, o, pred_label = self.net_handler.classify_sample(x)

            # append predicted label and probs
            pred_label_list.append(pred_label)
            probs.append(float(o[0, self.class_dict[actual_label]]))

            # plot wavs
            if self.cfg_tb['enable_plot']:
                plot_waveform(x_noise,
                              self.feature_params['fs'],
                              title='snr: [{}] actual: [{}] pred: [{}]'.format(
                                  snr, actual_label, pred_label),
                              plot_path=self.paths['noise_wavs'],
                              name='{}_snr{}'.format(actual_label, snr))

        # correct list
        corrects = [int(actual_label == l) for l in pred_label_list]

        # print message
        print("test bench noise acc: ", np.sum(corrects) / len(corrects))

        return corrects, probs