def _create_subviews(path, corpus): """ Load the subviews based on testing_list.txt and validation_list.txt """ test_list_path = os.path.join(path, 'testing_list.txt') dev_list_path = os.path.join(path, 'validation_list.txt') test_list = textfile.read_separated_lines(test_list_path, separator='/', max_columns=2) dev_list = textfile.read_separated_lines(dev_list_path, separator='/', max_columns=2) test_set = set([ '{}_{}'.format(os.path.splitext(x[1])[0], x[0]) for x in test_list ]) dev_set = set([ '{}_{}'.format(os.path.splitext(x[1])[0], x[0]) for x in dev_list ]) inv_train_set = test_set.union(dev_set) train_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs=inv_train_set, inverse=True) train_view = subview.Subview(corpus, filter_criteria=train_filter) corpus.import_subview('train', train_view) dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=dev_set, inverse=False) dev_view = subview.Subview(corpus, filter_criteria=dev_filter) corpus.import_subview('dev', dev_view) test_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs=test_set, inverse=False) test_view = subview.Subview(corpus, filter_criteria=test_filter) corpus.import_subview('test', test_view)
def create_sample_dataset(temp_dir): ds = audiomate.Corpus(str(temp_dir)) file_1_path = resources.sample_wav_file('wav_1.wav') file_2_path = resources.sample_wav_file('wav_2.wav') file_3_path = resources.get_resource_path( ['audio_formats', 'flac_1_16k_16b.flac']) file_1 = ds.new_file(file_1_path, track_idx='wav_1') file_2 = ds.new_file(file_2_path, track_idx='wav_2') file_3 = ds.new_file(file_3_path, track_idx='wav_3') issuer_1 = Speaker('spk-1', gender=Gender.MALE) issuer_2 = Speaker('spk-2', gender=Gender.FEMALE) issuer_3 = Issuer('spk-3') ds.import_issuers([issuer_1, issuer_2, issuer_3]) # 2.5951875 utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx) utt_2 = ds.new_utterance('utt-2', file_2.idx, issuer_idx=issuer_2.idx, start=0, end=1.5) utt_3 = ds.new_utterance('utt-3', file_2.idx, issuer_idx=issuer_2.idx, start=1.5, end=2.5) # 5.0416875 utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_3.idx) utt_1.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who am i')])) utt_2.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who are you')])) utt_3.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who is he')])) utt_4.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who are they')])) train_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'utt-1', 'utt-2', 'utt-3'}) sv_train = subview.Subview(ds, filter_criteria=[train_filter]) dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs={'utt-4'}) sv_dev = subview.Subview(ds, filter_criteria=[dev_filter]) ds.import_subview('train', sv_train) ds.import_subview('dev', sv_dev) return ds
def _load(self, path): corpus = audiomate.Corpus(path=path) for part in SUBSETS: sub_path = os.path.join(path, part) ids = self.get_ids_from_folder(sub_path) utt_ids = [] for idx in ids: add_ids = self.load_file(sub_path, idx, corpus) utt_ids.extend(add_ids) subview_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs=utt_ids) subview_corpus = subview.Subview(corpus, filter_criteria=[subview_filter]) corpus.import_subview(part, subview_corpus) TudaReader.create_wav_type_subviews(corpus, utt_ids, prefix='{}_'.format(part)) TudaReader.create_wav_type_subviews(corpus, corpus.utterances.keys()) return corpus
def test_match(self): filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'a', 'b', 'd'}) self.assertTrue(filter.match(assets.Utterance('a', 'x'), None)) self.assertTrue(filter.match(assets.Utterance('b', 'x'), None)) self.assertTrue(filter.match(assets.Utterance('d', 'x'), None)) self.assertFalse(filter.match(assets.Utterance('c', 'x'), None)) self.assertFalse(filter.match(assets.Utterance('e', 'x'), None))
def test_match_inverse(self): filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'a', 'b', 'd'}, inverse=True) assert not filter.match(tracks.Utterance('a', 'x'), None) assert not filter.match(tracks.Utterance('b', 'x'), None) assert not filter.match(tracks.Utterance('d', 'x'), None) assert filter.match(tracks.Utterance('c', 'x'), None) assert filter.match(tracks.Utterance('e', 'x'), None)
def test_match(self): utt_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'a', 'b', 'd'}) assert utt_filter.match(tracks.Utterance('a', 'x'), None) assert utt_filter.match(tracks.Utterance('b', 'x'), None) assert utt_filter.match(tracks.Utterance('d', 'x'), None) assert not utt_filter.match(tracks.Utterance('c', 'x'), None) assert not utt_filter.match(tracks.Utterance('e', 'x'), None)
def create_wav_type_subviews(corpus, utt_ids, prefix=''): splits = collections.defaultdict(list) for utt_id in utt_ids: wavtype = utt_id.split('_')[-1] splits[wavtype].append(utt_id) for sub_name, sub_utts in splits.items(): subview_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=sub_utts) subview_corpus = subview.Subview(corpus, filter_criteria=[subview_filter]) corpus.import_subview('{}{}'.format(prefix, sub_name), subview_corpus)
def test_import_subview(self): train_set = subview.Subview(None, filter_criteria=[ subview.MatchingUtteranceIdxFilter( utterance_idxs={'existing_utt'}) ]) self.corpus.import_subview('train', train_set) assert self.corpus.num_subviews == 1 assert self.corpus.subviews['train'] == train_set assert self.corpus.subviews['train'].corpus == self.corpus
def _subviews_from_utterance_splits(self, splits): """ Create subviews from a dict containing utterance-ids for each subview. e.g. {'train': ['utt-1', 'utt-2'], 'test': [...], ...} """ subviews = {} for idx, subview_utterances in splits.items(): filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=subview_utterances) split = subview.Subview(self.corpus, filter_criteria=filter) subviews[idx] = split return subviews
def _load(self, path): corpus = audiomate.Corpus(path=path) for part in SUBSETS: sub_path = os.path.join(path, part) ids = TudaReader.get_ids_from_folder(sub_path, part) for idx in ids: TudaReader.load_file(sub_path, idx, corpus) subview_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs=ids) subview_corpus = subview.Subview(corpus, filter_criteria=[subview_filter]) corpus.import_subview(part, subview_corpus) return corpus
def create_multi_label_corpus(): ds = audiomate.Corpus() wav_1_path = sample_wav_file('wav_1.wav') wav_2_path = sample_wav_file('wav_2.wav') wav_3_path = sample_wav_file('wav_3.wav') wav_4_path = sample_wav_file('wav_4.wav') file_1 = ds.new_file(wav_1_path, track_idx='wav-1') file_2 = ds.new_file(wav_2_path, track_idx='wav_2') file_3 = ds.new_file(wav_3_path, track_idx='wav_3') file_4 = ds.new_file(wav_4_path, track_idx='wav_4') issuer_1 = ds.new_issuer('spk-1') issuer_2 = ds.new_issuer('spk-2') issuer_3 = ds.new_issuer('spk-3') utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx) utt_2 = ds.new_utterance('utt-2', file_2.idx, issuer_idx=issuer_1.idx) utt_3 = ds.new_utterance('utt-3', file_3.idx, issuer_idx=issuer_2.idx, start=0, end=15) utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_2.idx, start=15, end=25) utt_5 = ds.new_utterance('utt-5', file_3.idx, issuer_idx=issuer_2.idx, start=25, end=40) utt_6 = ds.new_utterance('utt-6', file_4.idx, issuer_idx=issuer_3.idx, start=0, end=15) utt_7 = ds.new_utterance('utt-7', file_4.idx, issuer_idx=issuer_3.idx, start=15, end=25) utt_8 = ds.new_utterance('utt-8', file_4.idx, issuer_idx=issuer_3.idx, start=25, end=40) utt_1.set_label_list( LabelList(labels=[ Label('music', 0, 5), Label('speech', 5, 12), Label('music', 13, 15) ])) utt_2.set_label_list( LabelList(labels=[ Label('music', 0, 5), Label('speech', 5, 12), Label('music', 13, 15) ])) utt_3.set_label_list( LabelList(labels=[Label('music', 0, 1), Label('speech', 2, 6)])) utt_4.set_label_list( LabelList(labels=[ Label('music', 0, 5), Label('speech', 5, 12), Label('music', 13, 15) ])) utt_5.set_label_list(LabelList(labels=[Label('speech', 0, 7)])) utt_6.set_label_list( LabelList(labels=[ Label('music', 0, 5), Label('speech', 5, 12), Label('music', 13, 15) ])) utt_7.set_label_list( LabelList(labels=[Label('music', 0, 5), Label('speech', 5, 11)])) utt_8.set_label_list(LabelList(labels=[Label('music', 0, 10)])) train_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'utt-4', 'utt-5', 'utt-6'}) sv_train = subview.Subview(ds, filter_criteria=[train_filter]) dev_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'utt-7', 'utt-8'}) sv_dev = subview.Subview(ds, filter_criteria=[dev_filter]) ds.import_subview('train', sv_train) ds.import_subview('dev', sv_dev) ds.new_feature_container('mfcc', '/some/dummy/path/secondmfcc') ds.new_feature_container('energy', '/some/dummy/path/energy') return ds
def create_dataset(): temp_path = tempfile.mkdtemp() ds = audiomate.Corpus(temp_path) wav_1_path = sample_wav_file('wav_1.wav') wav_2_path = sample_wav_file('wav_2.wav') wav_3_path = sample_wav_file('wav_3.wav') wav_4_path = sample_wav_file('wav_4.wav') file_1 = ds.new_file(wav_1_path, track_idx='wav-1') file_2 = ds.new_file(wav_2_path, track_idx='wav_2') file_3 = ds.new_file(wav_3_path, track_idx='wav_3') file_4 = ds.new_file(wav_4_path, track_idx='wav_4') issuer_1 = Speaker('spk-1', gender=Gender.MALE) issuer_2 = Speaker('spk-2', gender=Gender.FEMALE) issuer_3 = Issuer('spk-3') ds.import_issuers([issuer_1, issuer_2, issuer_3]) # duration 2.5951875 utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx) utt_2 = ds.new_utterance('utt-2', file_2.idx, issuer_idx=issuer_1.idx) utt_3 = ds.new_utterance('utt-3', file_3.idx, issuer_idx=issuer_2.idx, start=0, end=1.5) utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_2.idx, start=1.5, end=2.5) utt_5 = ds.new_utterance('utt-5', file_4.idx, issuer_idx=issuer_3.idx) utt_1.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who am i')])) utt_2.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who are you', meta={ 'a': 'hey', 'b': 2 })])) utt_3.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who is he')])) utt_4.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who are they')])) utt_5.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who is she')])) train_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'utt-1', 'utt-2', 'utt-3'}) sv_train = subview.Subview(ds, filter_criteria=[train_filter]) dev_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'utt-4', 'utt-5'}) sv_dev = subview.Subview(ds, filter_criteria=[dev_filter]) ds.import_subview('train', sv_train) ds.import_subview('dev', sv_dev) ds.new_feature_container('mfcc', '/some/dummy/path') ds.new_feature_container('mel', '/some/dummy/path_mel') return ds
def test_serialize_inverse(self): f = subview.MatchingUtteranceIdxFilter(utterance_idxs={'a', 'b', 'd'}, inverse=True) assert f.serialize() == 'exclude,a,b,d'
def test_serialize(self): f = subview.MatchingUtteranceIdxFilter(utterance_idxs={'a', 'b', 'd'}) assert f.serialize() == 'include,a,b,d'
def setUp(self): filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'utt-1', 'utt-3'}) self.corpus = resources.create_dataset() self.subview = subview.Subview(self.corpus, filter_criteria=[filter])
def sample_subview(): filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'utt-1', 'utt-3'}) corpus = resources.create_dataset() return subview.Subview(corpus, filter_criteria=[filter])