Exemple #1
0
    def update_read_command(self):
        """
    update mic
    """

        # read chunk
        is_onset = self.read_mic_data()

        # onset was detected
        if is_onset:

            # start collection of items
            self.collector.start_collecting()

        # collection is full
        if self.collector.is_full():

            # read out collection
            x_onset = self.collector.read_collection()

            # extract features
            mfcc_bon, bon_pos = self.feature_extractor.extract_mfcc(x_onset)

            # classify collection
            y_hat, label = self.classifier.classify(mfcc_bon)

            # plot
            plot_mfcc_profile(
                x_onset[bon_pos *
                        self.hop:(bon_pos +
                                  self.feature_params['frame_size']) *
                        self.hop],
                self.feature_params['fs'],
                self.N,
                self.hop,
                mfcc_bon,
                frame_size=self.feature_params['frame_size'],
                plot_path=self.plot_path,
                name='collect-{}_label-{}'.format(
                    self.collector.collection_counter, label),
                enable_plot=self.mic_params['enable_plot'])

            # clear read queue
            self.clear_mic_queue()

            return label

        return None
Exemple #2
0
def showcase_wavs(cfg,
                  raw_plot=True,
                  spec_plot=True,
                  mfcc_plot=True,
                  show_plot=False):
    """
  showcase wavs
  """

    # plot path
    plot_path = '../docu/thesis/3_signal/figs/'

    # change params
    feature_params = cfg['feature_params'].copy()
    feature_params['n_ceps_coeff'] = 32
    feature_params['norm_features'] = True

    # init feature extractor
    feature_extractor = FeatureExtractor(feature_params)

    # wav, anno dir
    wav_dir, anno_dir = '../ignore/my_recordings/showcase_wavs/', '../ignore/my_recordings/showcase_wavs/annotation/'

    # analyze some wavs
    for wav, anno in zip(glob(wav_dir + '*.wav'),
                         glob(anno_dir + '*.TextGrid')):

        # info
        print("\nwav: ", wav), print("anno: ", anno)

        # load file
        x, _ = librosa.load(wav, sr=feature_params['fs'])

        # raw waveform
        if raw_plot:
            plot_waveform(x,
                          feature_params['fs'],
                          anno_file=anno,
                          hop=feature_extractor.hop,
                          plot_path=plot_path,
                          name='signal_raw_' +
                          wav.split('/')[-1].split('.')[0] + '_my',
                          show_plot=show_plot)

        # spectogram
        if spec_plot:
            plot_spec_profile(x,
                              feature_extractor.calc_spectogram(x).T,
                              feature_params['fs'],
                              feature_extractor.N,
                              feature_extractor.hop,
                              anno_file=anno,
                              plot_path=plot_path,
                              title=wav.split('/')[-1].split('.')[0] + '_my',
                              name='signal_spec-lin_' +
                              wav.split('/')[-1].split('.')[0] + '_my',
                              show_plot=show_plot)
            plot_spec_profile(x,
                              feature_extractor.calc_spectogram(x).T,
                              feature_params['fs'],
                              feature_extractor.N,
                              feature_extractor.hop,
                              log_scale=True,
                              anno_file=anno,
                              plot_path=plot_path,
                              title=wav.split('/')[-1].split('.')[0] + '_my',
                              name='signal_spec-log_' +
                              wav.split('/')[-1].split('.')[0] + '_my',
                              show_plot=show_plot)

        # mfcc
        if mfcc_plot:
            mfcc, bon_pos = feature_extractor.extract_mfcc(
                x, reduce_to_best_onset=False)
            plot_mfcc_profile(x,
                              cfg['feature_params']['fs'],
                              feature_extractor.N,
                              feature_extractor.hop,
                              mfcc,
                              anno_file=anno,
                              sep_features=True,
                              bon_pos=bon_pos,
                              frame_size=cfg['feature_params']['frame_size'],
                              plot_path=plot_path,
                              name='signal_mfcc_' +
                              wav.split('/')[-1].split('.')[0] + '_my',
                              close_plot=False,
                              show_plot=show_plot)
Exemple #3
0
  def extract_mfcc_data(self, wavs, annos, n_examples, set_name=None):
    """
    extract mfcc data from wav-files
    wavs must be in a 2D-array [[wavs_class1], [wavs_class2]] so that n_examples will work properly
    """

    # mfcc_data: [n x m x l], labels and index
    mfcc_data, label_data, index_data = np.empty(shape=(0, self.channel_size, self.feature_size, self.frame_size), dtype=np.float64), [], []

    # extract class wavs
    for class_wavs, class_annos in zip(wavs, annos):

      # class annotation file names extraction
      class_annos_file_names = [l + i for f, i, l in [self.file_naming_extraction(a, file_ext='.TextGrid') for a in class_annos]]

      # number of class examples
      num_class_examples = 0

      # run through each example in class wavs
      for wav in class_wavs:
        
        # extract file namings
        file_name, file_index, label = self.file_naming_extraction(wav, file_ext=self.dataset_cfg['file_ext'])

        # get annotation if available
        anno = None
        if label + file_index in class_annos_file_names: anno = class_annos[class_annos_file_names.index(label + file_index)]

        # load and pre-process audio
        x, wav_is_useless = self.wav_pre_processing(wav)
        if wav_is_useless: continue

        # print some info
        if self.verbose: print("wav: [{}] with label: [{}], samples=[{}], time=[{}]s".format(wav, label, len(x), len(x) / self.feature_params['fs']))

        # extract feature vectors [m x l]
        mfcc, bon_pos = self.feature_extractor.extract_mfcc(x, reduce_to_best_onset=False)

        # collect wavs
        if self.collect_wavs: self.pre_wavs.append((librosa.util.normalize(x), label + str(file_index) + '_' + set_name, bon_pos))

        # plot mfcc features
        plot_mfcc_profile(x, self.feature_params['fs'], self.feature_extractor.N, self.feature_extractor.hop, mfcc, anno_file=anno, onsets=None, bon_pos=bon_pos, mient=None, minreg=None, frame_size=self.frame_size, plot_path=self.plot_paths['mfcc'], name=label + str(file_index) + '_' + set_name, enable_plot=self.dataset_cfg['enable_plot'])

        # damaged file check
        if self.dataset_cfg['filter_damaged_files']:

          # handle damaged files
          if self.detect_damaged_file(mfcc, wav): continue

        # add to mfcc_data container
        mfcc_data = np.vstack((mfcc_data, mfcc[np.newaxis, :, :, bon_pos:bon_pos+self.frame_size]))
        label_data.append(label)
        index_data.append(label + file_index)

        # update number of examples per class
        num_class_examples += 1

        # stop if desired examples are reached
        if num_class_examples >= n_examples: break


    return mfcc_data, label_data, None, index_data
    # load audio
    x, _ = librosa.load(wav, sr=16000)

    # feature extraction
    mfcc, bon_pos = feature_extractor.extract_mfcc(x, reduce_to_best_onset=False)

    print("mfcc: ", mfcc.shape)
    
    # invert mfcc
    x_hat = feature_extractor.invert_mfcc(np.squeeze(mfcc))

    # save invert mfcc
    soundfile.write(wav.split('.wav')[0] + '_inv_mfcc.wav', x_hat, 16000, subtype=None, endian=None, format=None, closefd=True)

    print("x_hat: ", x_hat.shape)

    plot_mfcc_profile(x, 16000, feature_extractor.N, feature_extractor.hop, mfcc, anno_file=anno, sep_features=True, diff_plot=False, bon_pos=bon_pos, frame_size=cfg['feature_params']['frame_size'], plot_path=wav_dir, name=wav.split('/')[-1].split('.')[0], show_plot=False, close_plot=False)
    plot_waveform(x, 16000, anno_file=anno, hop=feature_extractor.hop, title=wav.split('/')[-1].split('.')[0]+'_my', plot_path=wav_dir, name=wav.split('/')[-1].split('.')[0], show_plot=False)
    
  # random
  #x = np.random.randn(16000)
  #mfcc, bon_pos = feature_extractor.extract_mfcc(x, reduce_to_best_onset=False)
  #plot_mfcc_profile(x, 16000, feature_extractor.N, feature_extractor.hop, mfcc, bon_pos=bon_pos, name='rand', show_plot=True)